Skip to content

Commit

Permalink
[ML] Use random sampler in field stats aggs
Browse files Browse the repository at this point in the history
  • Loading branch information
qn895 committed Jul 19, 2022
1 parent f99264c commit c8c5ab6
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 10 deletions.
8 changes: 8 additions & 0 deletions x-pack/packages/ml/agg_utils/src/build_sampler_aggregation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,11 @@ export function buildSamplerAggregation(
},
};
}

// Returns the path of aggregations in the elasticsearch response, as an array,
// depending on whether sampling is being used.
// A supplied samplerShardSize (the shard_size parameter of the sampler aggregation)
// of less than 1 indicates no sampling, and an empty array is returned.
export function getRandomSamplerAggregationsResponsePath(samplerShardSize: number): string[] {
return samplerShardSize > 0 ? ['sample'] : [];
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
* 2.0.
*/

import * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';

export const SAMPLER_TOP_TERMS_THRESHOLD = 100000;
export const SAMPLER_TOP_TERMS_SHARD_SIZE = 5000;
export const AGGREGATABLE_EXISTS_REQUEST_BATCH_SIZE = 200;
Expand All @@ -15,3 +17,29 @@ export const MAX_CHART_COLUMNS = 20;
export const MAX_EXAMPLES_DEFAULT = 10;
export const MAX_PERCENT = 100;
export const PERCENTILE_SPACING = 5;

/**
* Wraps the supplied aggregations in a sampler aggregation.
* A supplied samplerShardSize (the shard_size parameter of the sampler aggregation)
* of less than 1 indicates no sampling, and the aggs are returned as-is.
*/
export function buildRandomSamplerAggregation(
aggs: any,
probability: number,
seed?: number
): Record<string, estypes.AggregationsAggregationContainer> {
if (probability <= 0 || probability > 1) {
return aggs;
}

return {
sample: {
aggs,
// @ts-expect-error AggregationsAggregationContainer needs to be updated with random_sampler
random_sampler: {
probability,
...(seed ? { seed } : {}),
},
},
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ import type {
ISearchOptions,
ISearchStart,
} from '@kbn/data-plugin/public';
import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';

import { getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
import { buildRandomSamplerAggregation } from './constants';
import type {
Field,
BooleanFieldStats,
Expand Down Expand Up @@ -48,7 +48,7 @@ export const getBooleanFieldsStatsRequest = (
});
const searchBody = {
query,
aggs: buildSamplerAggregation(aggs, samplerShardSize),
aggs: buildRandomSamplerAggregation(aggs, 0.05),
...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ import type {
ISearchOptions,
ISearchStart,
} from '@kbn/data-plugin/public';
import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import { getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
import { buildRandomSamplerAggregation } from './constants';
import type { FieldStatsCommonRequestParams } from '../../../../../common/types/field_stats';
import type { Field, DateFieldStats, Aggs } from '../../../../../common/types/field_stats';
import { FieldStatsError, isIKibanaSearchResponse } from '../../../../../common/types/field_stats';
Expand Down Expand Up @@ -45,7 +46,7 @@ export const getDateFieldsStatsRequest = (

const searchBody = {
query,
aggs: buildSamplerAggregation(aggs, samplerShardSize),
aggs: buildRandomSamplerAggregation(aggs, 0.05),
...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
};
return {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ import {
ISearchOptions,
} from '@kbn/data-plugin/common';
import type { ISearchStart } from '@kbn/data-plugin/public';
import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import { getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
import {
buildRandomSamplerAggregation,
MAX_PERCENT,
PERCENTILE_SPACING,
SAMPLER_TOP_TERMS_SHARD_SIZE,
Expand Down Expand Up @@ -101,7 +102,7 @@ export const getNumericFieldsStatsRequest = (

const searchBody = {
query,
aggs: buildSamplerAggregation(aggs, samplerShardSize),
aggs: buildRandomSamplerAggregation(aggs, 0.05),
...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,13 @@ import type {
ISearchOptions,
ISearchStart,
} from '@kbn/data-plugin/public';
import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
import { getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import { SAMPLER_TOP_TERMS_SHARD_SIZE, SAMPLER_TOP_TERMS_THRESHOLD } from './constants';
import {
buildRandomSamplerAggregation,
SAMPLER_TOP_TERMS_SHARD_SIZE,
SAMPLER_TOP_TERMS_THRESHOLD,
} from './constants';
import type {
Aggs,
Bucket,
Expand All @@ -34,6 +38,7 @@ export const getStringFieldStatsRequest = (
) => {
const { index, query, runtimeFieldMap, samplerShardSize } = params;

console.log('params', params);
const size = 0;

const aggs: Aggs = {};
Expand Down Expand Up @@ -67,10 +72,12 @@ export const getStringFieldStatsRequest = (

const searchBody = {
query,
aggs: buildSamplerAggregation(aggs, samplerShardSize),
aggs: buildRandomSamplerAggregation(aggs, 0.05),
...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
};

console.log('searchBody', searchBody);

return {
index,
size,
Expand Down

0 comments on commit c8c5ab6

Please sign in to comment.