Skip to content

Commit

Permalink
Fix aggregations to work with Tuples
Browse files Browse the repository at this point in the history
  • Loading branch information
peterdemartini committed Dec 21, 2020
1 parent 64d8032 commit 746b878
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 43 deletions.
46 changes: 29 additions & 17 deletions packages/data-mate/src/aggregation-frame/AggregationFrame.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import {
KeyAggregation, ValueAggregation, valueAggMap,
FieldAgg, KeyAggFn, keyAggMap, makeUniqueKeyAgg
} from '../column';
import { isNumberLike } from '../vector';
import { getCommonTupleType, isNumberLike } from '../vector';
import { Builder } from '../builder';
import { getBuilderForField, getMaxColumnSize } from './utils';
import {
Expand Down Expand Up @@ -128,10 +128,9 @@ export class AggregationFrame<
field: keyof T,
as?: A
): this|AggregationFrame<WithAlias<T, A, number>> {
const { name, type } = this._ensureColumn(field, as);
if (!isNumberLike(type)) {
throw new Error(`${ValueAggregation.avg} requires a numeric field type`);
}
const { name } = this._ensureColumn(field, as);
this._ensureNumericLike(name, ValueAggregation.avg);

const aggObject = this._aggregations.get(name) ?? { };
aggObject.value = ValueAggregation.avg;
this._aggregations.set(name, aggObject);
Expand All @@ -154,10 +153,9 @@ export class AggregationFrame<
field: keyof T,
as?: A
): this|AggregationFrame<WithAlias<T, A, number>> {
const { name, type } = this._ensureColumn(field, as);
if (!isNumberLike(type)) {
throw new Error(`${ValueAggregation.sum} requires a numeric field type`);
}
const { name } = this._ensureColumn(field, as);
this._ensureNumericLike(name, ValueAggregation.sum);

const aggObject = this._aggregations.get(name) ?? { };
aggObject.value = ValueAggregation.sum;
this._aggregations.set(name, aggObject);
Expand All @@ -180,10 +178,9 @@ export class AggregationFrame<
field: keyof T,
as?: A
): this|AggregationFrame<WithAlias<T, A, number>> {
const { name, type } = this._ensureColumn(field, as);
if (!isNumberLike(type)) {
throw new Error(`${ValueAggregation.min} requires a numeric field type`);
}
const { name } = this._ensureColumn(field, as);
this._ensureNumericLike(name, ValueAggregation.min);

const aggObject = this._aggregations.get(name) ?? { };
aggObject.value = ValueAggregation.min;
this._aggregations.set(name, aggObject);
Expand All @@ -206,10 +203,9 @@ export class AggregationFrame<
field: keyof T,
as?: A
): this|AggregationFrame<WithAlias<T, A, number>> {
const { name, type } = this._ensureColumn(field, as);
if (!isNumberLike(type)) {
throw new Error(`${ValueAggregation.max} requires a numeric field type`);
}
const { name } = this._ensureColumn(field, as);
this._ensureNumericLike(name, ValueAggregation.max);

const aggObject = this._aggregations.get(name) ?? { };
aggObject.value = ValueAggregation.max;
this._aggregations.set(name, aggObject);
Expand Down Expand Up @@ -485,6 +481,22 @@ export class AggregationFrame<
};
}

private _ensureNumericLike(field: keyof T, ctx: string): void {
const column = this.getColumnOrThrow(field);
let fieldType: FieldType;
if (column.config.type === FieldType.Tuple) {
fieldType = getCommonTupleType(
field as string, column.vector.childConfig
);
} else {
fieldType = column.config.type as FieldType;
}

if (!isNumberLike(fieldType)) {
throw new Error(`${ctx} requires a numeric field type`);
}
}

/**
* Execute the aggregations and flatten the grouped data.
* Assigns the new columns to this.
Expand Down
9 changes: 7 additions & 2 deletions packages/data-mate/src/aggregation-frame/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { Builder } from '../builder';
import { Column, ValueAggregation, KeyAggregation } from '../column';
import { WritableData } from '../core';
import {
isNumberLike, isFloatLike
isNumberLike, isFloatLike, getCommonTupleType
} from '../vector';

export function getBuilderForField(
Expand All @@ -29,7 +29,12 @@ export function getBuilderForField(
});
}

const currentType = col.config.type as FieldType;
let currentType = col.config.type as FieldType;
if (currentType === FieldType.Tuple) {
currentType = getCommonTupleType(
col.name, col.vector.childConfig
);
}
let type: FieldType|undefined;
if (valueAgg === ValueAggregation.avg) {
if (currentType === FieldType.Long) {
Expand Down
78 changes: 54 additions & 24 deletions packages/data-mate/src/vector/utils.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import {
DataTypeFields,
FieldType
} from '@terascope/types';
import {
isNumber, isBigInt, getTypeOf, isArrayLike
isNumber, isBigInt, getTypeOf, isArrayLike, TSError
} from '@terascope/utils';
import { ListVector } from './ListVector';
import {
Expand Down Expand Up @@ -90,34 +91,34 @@ export function getNumericValues(value: unknown): {
values: bigint[],
type: 'bigint'
} {
if (value == null) {
return { values: [], type: 'number' };
}

if (isNumber(value)) {
return { values: [value], type: 'number' };
}
if (isBigInt(value)) {
return { values: [value], type: 'bigint' };
}
let type: 'number'|'bigint' = 'number';
const values: any[] = [];
function processValue(v: unknown): void {
if (v == null) return;

if (isArrayLike(value)) {
let type: 'number'|'bigint' = 'number';
const values: any[] = [];
for (const v of value) {
if (v == null) continue;
if (type === 'number' && isBigInt(v)) {
type = 'bigint';
if (isArrayLike(v)) {
for (const nested of v) {
processValue(nested);
}
values.push(v);
return;
}

if (!isNumber(v) && !isBigInt(v)) {
throw new Error(`Invalid to numeric values in ${v} (${getTypeOf(v)})`);
}

if (type === 'number' && isBigInt(v)) {
type = 'bigint';
}
return {
values,
type
};

values.push(v);
}
processValue(value);

throw new Error(`Unable to get numeric values from ${value} (${getTypeOf(value)})`);
return {
values,
type
};
}

export function isNumberLike(type: FieldType): boolean {
Expand All @@ -138,3 +139,32 @@ export function isIntLike(type: FieldType): boolean {
if (type === FieldType.Integer) return true;
return false;
}

export function getCommonTupleType(
tupleField: string, childConfig: DataTypeFields|undefined
): FieldType {
let fieldType: FieldType|undefined;
for (const config of Object.values(childConfig ?? {})) {
const type = config.type as FieldType;

if (!fieldType || type === fieldType) {
fieldType = type;
} else if (isIntLike(fieldType) && isIntLike(type)) {
fieldType = FieldType.Integer;
} else if (isFloatLike(fieldType) && isFloatLike(type)) {
fieldType = FieldType.Float;
} else {
throw new TSError(
`Field "${tupleField}" has conflicting field types, ${fieldType} incompatible with ${type}`,
{ statusCode: 400, context: { safe: true } }
);
}
}
if (!fieldType) {
throw new TSError(
`Field "${tupleField}" has no child fields`,
{ statusCode: 400, context: { safe: true } }
);
}
return fieldType;
}
30 changes: 30 additions & 0 deletions packages/data-mate/test/aggregation-frame-spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,21 @@ describe('AggregationFrame', () => {
}
]);
});

it('should get the right result when using tuple', async () => {
const grouped = dataFrame.createTupleFrom(['age'], 'age_tuple').aggregate();
const resultFrame = await grouped.sum('age_tuple').run();
expect(resultFrame.toJSON()).toEqual([
{
name: 'Billy',
age: 64,
age_tuple: 310,
gender: 'M',
scores: [4, 9, 3],
date: '2020-09-15T17:39:11.195Z'
}
]);
});
});

describe('->sum(scores)', () => {
Expand All @@ -135,6 +150,21 @@ describe('AggregationFrame', () => {
}
]);
});

it('should get the right result when using tuple', async () => {
const grouped = dataFrame.createTupleFrom(['scores'], 'score_tuple').aggregate();
const resultFrame = await grouped.sum('score_tuple').run();
expect(resultFrame.toJSON()).toEqual([
{
name: 'Billy',
age: 64,
gender: 'M',
scores: [4, 9, 3],
score_tuple: 177,
date: '2020-09-15T17:39:11.195Z'
}
]);
});
});

describe('->avg(age)', () => {
Expand Down

0 comments on commit 746b878

Please sign in to comment.