Skip to content

Commit

Permalink
Merge pull request #2387 from terascope/data-frame-agg-fixes
Browse files Browse the repository at this point in the history
DataFrame Aggs now work with Tuples
  • Loading branch information
peterdemartini authored Dec 21, 2020
2 parents f9632da + c0fcbb0 commit 340dbbe
Show file tree
Hide file tree
Showing 7 changed files with 126 additions and 50 deletions.
2 changes: 1 addition & 1 deletion packages/data-mate/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@terascope/data-mate",
"displayName": "Data-Mate",
"version": "0.19.3",
"version": "0.19.4",
"description": "Library of data validations/transformations",
"homepage": "https://github.com/terascope/teraslice/tree/master/packages/data-mate#readme",
"repository": {
Expand Down
49 changes: 30 additions & 19 deletions packages/data-mate/src/aggregation-frame/AggregationFrame.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import {
KeyAggregation, ValueAggregation, valueAggMap,
FieldAgg, KeyAggFn, keyAggMap, makeUniqueKeyAgg
} from '../column';
import { isNumberLike } from '../vector';
import { getCommonTupleType, isNumberLike } from '../vector';
import { Builder } from '../builder';
import { getBuilderForField, getMaxColumnSize } from './utils';
import {
Expand Down Expand Up @@ -128,10 +128,9 @@ export class AggregationFrame<
field: keyof T,
as?: A
): this|AggregationFrame<WithAlias<T, A, number>> {
const { name, type } = this._ensureColumn(field, as);
if (!isNumberLike(type)) {
throw new Error(`${ValueAggregation.avg} requires a numeric field type`);
}
const { name } = this._ensureColumn(field, as);
this._ensureNumericLike(name, ValueAggregation.avg);

const aggObject = this._aggregations.get(name) ?? { };
aggObject.value = ValueAggregation.avg;
this._aggregations.set(name, aggObject);
Expand All @@ -154,10 +153,9 @@ export class AggregationFrame<
field: keyof T,
as?: A
): this|AggregationFrame<WithAlias<T, A, number>> {
const { name, type } = this._ensureColumn(field, as);
if (!isNumberLike(type)) {
throw new Error(`${ValueAggregation.sum} requires a numeric field type`);
}
const { name } = this._ensureColumn(field, as);
this._ensureNumericLike(name, ValueAggregation.sum);

const aggObject = this._aggregations.get(name) ?? { };
aggObject.value = ValueAggregation.sum;
this._aggregations.set(name, aggObject);
Expand All @@ -180,10 +178,9 @@ export class AggregationFrame<
field: keyof T,
as?: A
): this|AggregationFrame<WithAlias<T, A, number>> {
const { name, type } = this._ensureColumn(field, as);
if (!isNumberLike(type)) {
throw new Error(`${ValueAggregation.min} requires a numeric field type`);
}
const { name } = this._ensureColumn(field, as);
this._ensureNumericLike(name, ValueAggregation.min);

const aggObject = this._aggregations.get(name) ?? { };
aggObject.value = ValueAggregation.min;
this._aggregations.set(name, aggObject);
Expand All @@ -206,10 +203,9 @@ export class AggregationFrame<
field: keyof T,
as?: A
): this|AggregationFrame<WithAlias<T, A, number>> {
const { name, type } = this._ensureColumn(field, as);
if (!isNumberLike(type)) {
throw new Error(`${ValueAggregation.max} requires a numeric field type`);
}
const { name } = this._ensureColumn(field, as);
this._ensureNumericLike(name, ValueAggregation.max);

const aggObject = this._aggregations.get(name) ?? { };
aggObject.value = ValueAggregation.max;
this._aggregations.set(name, aggObject);
Expand Down Expand Up @@ -461,8 +457,7 @@ export class AggregationFrame<
name: keyof T,
type: FieldType
} {
const col = this.getColumn(field);
if (!col) throw new Error(`Unknown column named "${field}"`);
const col = this.getColumnOrThrow(field);

if (as) {
const columns: Column<any, keyof T>[] = [];
Expand All @@ -486,6 +481,22 @@ export class AggregationFrame<
};
}

private _ensureNumericLike(field: keyof T, ctx: string): void {
const column = this.getColumnOrThrow(field);
let fieldType: FieldType;
if (column.config.type === FieldType.Tuple) {
fieldType = getCommonTupleType(
field as string, column.vector.childConfig
);
} else {
fieldType = column.config.type as FieldType;
}

if (!isNumberLike(fieldType)) {
throw new Error(`${ctx} requires a numeric field type`);
}
}

/**
* Execute the aggregations and flatten the grouped data.
* Assigns the new columns to this.
Expand Down
9 changes: 7 additions & 2 deletions packages/data-mate/src/aggregation-frame/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { Builder } from '../builder';
import { Column, ValueAggregation, KeyAggregation } from '../column';
import { WritableData } from '../core';
import {
isNumberLike, isFloatLike
isNumberLike, isFloatLike, getCommonTupleType
} from '../vector';

export function getBuilderForField(
Expand All @@ -29,7 +29,12 @@ export function getBuilderForField(
});
}

const currentType = col.config.type as FieldType;
let currentType = col.config.type as FieldType;
if (currentType === FieldType.Tuple) {
currentType = getCommonTupleType(
col.name, col.vector.childConfig
);
}
let type: FieldType|undefined;
if (valueAgg === ValueAggregation.avg) {
if (currentType === FieldType.Long) {
Expand Down
78 changes: 54 additions & 24 deletions packages/data-mate/src/vector/utils.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import {
DataTypeFields,
FieldType
} from '@terascope/types';
import {
isNumber, isBigInt, getTypeOf, isArrayLike
isNumber, isBigInt, getTypeOf, isArrayLike, TSError
} from '@terascope/utils';
import { ListVector } from './ListVector';
import {
Expand Down Expand Up @@ -90,34 +91,34 @@ export function getNumericValues(value: unknown): {
values: bigint[],
type: 'bigint'
} {
if (value == null) {
return { values: [], type: 'number' };
}

if (isNumber(value)) {
return { values: [value], type: 'number' };
}
if (isBigInt(value)) {
return { values: [value], type: 'bigint' };
}
let type: 'number'|'bigint' = 'number';
const values: any[] = [];
function processValue(v: unknown): void {
if (v == null) return;

if (isArrayLike(value)) {
let type: 'number'|'bigint' = 'number';
const values: any[] = [];
for (const v of value) {
if (v == null) continue;
if (type === 'number' && isBigInt(v)) {
type = 'bigint';
if (isArrayLike(v)) {
for (const nested of v) {
processValue(nested);
}
values.push(v);
return;
}

if (!isNumber(v) && !isBigInt(v)) {
throw new Error(`Invalid to numeric values in ${v} (${getTypeOf(v)})`);
}

if (type === 'number' && isBigInt(v)) {
type = 'bigint';
}
return {
values,
type
};

values.push(v);
}
processValue(value);

throw new Error(`Unable to get numeric values from ${value} (${getTypeOf(value)})`);
return {
values,
type
};
}

export function isNumberLike(type: FieldType): boolean {
Expand All @@ -138,3 +139,32 @@ export function isIntLike(type: FieldType): boolean {
if (type === FieldType.Integer) return true;
return false;
}

export function getCommonTupleType(
tupleField: string, childConfig: DataTypeFields|undefined
): FieldType {
let fieldType: FieldType|undefined;
for (const config of Object.values(childConfig ?? {})) {
const type = config.type as FieldType;

if (!fieldType || type === fieldType) {
fieldType = type;
} else if (isIntLike(fieldType) && isIntLike(type)) {
fieldType = FieldType.Integer;
} else if (isFloatLike(fieldType) && isFloatLike(type)) {
fieldType = FieldType.Float;
} else {
throw new TSError(
`Field "${tupleField}" has conflicting field types, ${fieldType} incompatible with ${type}`,
{ statusCode: 400, context: { safe: true } }
);
}
}
if (!fieldType) {
throw new TSError(
`Field "${tupleField}" has no child fields`,
{ statusCode: 400, context: { safe: true } }
);
}
return fieldType;
}
30 changes: 30 additions & 0 deletions packages/data-mate/test/aggregation-frame-spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,21 @@ describe('AggregationFrame', () => {
}
]);
});

it('should get the right result when using tuple', async () => {
const grouped = dataFrame.createTupleFrom(['age'], 'age_tuple').aggregate();
const resultFrame = await grouped.sum('age_tuple').run();
expect(resultFrame.toJSON()).toEqual([
{
name: 'Billy',
age: 64,
age_tuple: 310,
gender: 'M',
scores: [4, 9, 3],
date: '2020-09-15T17:39:11.195Z'
}
]);
});
});

describe('->sum(scores)', () => {
Expand All @@ -135,6 +150,21 @@ describe('AggregationFrame', () => {
}
]);
});

it('should get the right result when using tuple', async () => {
const grouped = dataFrame.createTupleFrom(['scores'], 'score_tuple').aggregate();
const resultFrame = await grouped.sum('score_tuple').run();
expect(resultFrame.toJSON()).toEqual([
{
name: 'Billy',
age: 64,
gender: 'M',
scores: [4, 9, 3],
score_tuple: 177,
date: '2020-09-15T17:39:11.195Z'
}
]);
});
});

describe('->avg(age)', () => {
Expand Down
4 changes: 2 additions & 2 deletions packages/elasticsearch-store/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "elasticsearch-store",
"displayName": "Elasticsearch Store",
"version": "0.40.3",
"version": "0.40.4",
"description": "An API for managing an elasticsearch index, with versioning and migration support.",
"homepage": "https://github.com/terascope/teraslice/tree/master/packages/elasticsearch-store#readme",
"bugs": {
Expand All @@ -24,7 +24,7 @@
"test:watch": "ts-scripts test --watch . --"
},
"dependencies": {
"@terascope/data-mate": "^0.19.3",
"@terascope/data-mate": "^0.19.4",
"@terascope/data-types": "^0.25.1",
"@terascope/types": "^0.7.0",
"@terascope/utils": "^0.34.1",
Expand Down
4 changes: 2 additions & 2 deletions packages/ts-transforms/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "ts-transforms",
"displayName": "TS Transforms",
"version": "0.48.3",
"version": "0.48.4",
"description": "An ETL framework built upon xlucene-evaluator",
"homepage": "https://github.com/terascope/teraslice/tree/master/packages/ts-transforms#readme",
"bugs": {
Expand Down Expand Up @@ -35,7 +35,7 @@
"test:watch": "ts-scripts test --watch . --"
},
"dependencies": {
"@terascope/data-mate": "^0.19.3",
"@terascope/data-mate": "^0.19.4",
"@terascope/types": "^0.7.0",
"@terascope/utils": "^0.34.1",
"awesome-phonenumber": "^2.43.0",
Expand Down

0 comments on commit 340dbbe

Please sign in to comment.