diff --git a/x-pack/packages/ml/agg_utils/src/build_sampler_aggregation.ts b/x-pack/packages/ml/agg_utils/src/build_sampler_aggregation.ts index 30345b00caf2f..57f75381c71b3 100644 --- a/x-pack/packages/ml/agg_utils/src/build_sampler_aggregation.ts +++ b/x-pack/packages/ml/agg_utils/src/build_sampler_aggregation.ts @@ -29,3 +29,11 @@ export function buildSamplerAggregation( }, }; } + +// Returns the path of aggregations in the elasticsearch response, as an array, +// depending on whether sampling is being used. +// A supplied samplerShardSize (the shard_size parameter of the sampler aggregation) +// of less than 1 indicates no sampling, and an empty array is returned. +export function getRandomSamplerAggregationsResponsePath(samplerShardSize: number): string[] { + return samplerShardSize > 0 ? ['sample'] : []; +} diff --git a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/constants.ts b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/constants.ts index 6da11fd850acc..723c0246c0caa 100644 --- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/constants.ts +++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/constants.ts @@ -5,6 +5,8 @@ * 2.0. */ +import * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; + export const SAMPLER_TOP_TERMS_THRESHOLD = 100000; export const SAMPLER_TOP_TERMS_SHARD_SIZE = 5000; export const AGGREGATABLE_EXISTS_REQUEST_BATCH_SIZE = 200; @@ -15,3 +17,29 @@ export const MAX_CHART_COLUMNS = 20; export const MAX_EXAMPLES_DEFAULT = 10; export const MAX_PERCENT = 100; export const PERCENTILE_SPACING = 5; + +/** + * Wraps the supplied aggregations in a sampler aggregation. + * A supplied samplerShardSize (the shard_size parameter of the sampler aggregation) + * of less than 1 indicates no sampling, and the aggs are returned as-is. + */ +export function buildRandomSamplerAggregation( + aggs: any, + probability: number, + seed?: number +): Record { + if (probability <= 0 || probability > 1) { + return aggs; + } + + return { + sample: { + aggs, + // @ts-expect-error AggregationsAggregationContainer needs to be updated with random_sampler + random_sampler: { + probability, + ...(seed ? { seed } : {}), + }, + }, + }; +} diff --git a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_boolean_field_stats.ts b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_boolean_field_stats.ts index 5b91d3716ffd9..32b47a1985437 100644 --- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_boolean_field_stats.ts +++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_boolean_field_stats.ts @@ -14,9 +14,9 @@ import type { ISearchOptions, ISearchStart, } from '@kbn/data-plugin/public'; -import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils'; import { isPopulatedObject } from '@kbn/ml-is-populated-object'; - +import { getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils'; +import { buildRandomSamplerAggregation } from './constants'; import type { Field, BooleanFieldStats, @@ -48,7 +48,7 @@ export const getBooleanFieldsStatsRequest = ( }); const searchBody = { query, - aggs: buildSamplerAggregation(aggs, samplerShardSize), + aggs: buildRandomSamplerAggregation(aggs, 0.05), ...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}), }; diff --git a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_date_field_stats.ts b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_date_field_stats.ts index 1f55f8117c1be..9eda3e50b93f6 100644 --- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_date_field_stats.ts +++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_date_field_stats.ts @@ -15,8 +15,9 @@ import type { ISearchOptions, ISearchStart, } from '@kbn/data-plugin/public'; -import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils'; import { isPopulatedObject } from '@kbn/ml-is-populated-object'; +import { getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils'; +import { buildRandomSamplerAggregation } from './constants'; import type { FieldStatsCommonRequestParams } from '../../../../../common/types/field_stats'; import type { Field, DateFieldStats, Aggs } from '../../../../../common/types/field_stats'; import { FieldStatsError, isIKibanaSearchResponse } from '../../../../../common/types/field_stats'; @@ -45,7 +46,7 @@ export const getDateFieldsStatsRequest = ( const searchBody = { query, - aggs: buildSamplerAggregation(aggs, samplerShardSize), + aggs: buildRandomSamplerAggregation(aggs, 0.05), ...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}), }; return { diff --git a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_numeric_field_stats.ts b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_numeric_field_stats.ts index 033f4469b0bc2..e4e8db0ba2e3e 100644 --- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_numeric_field_stats.ts +++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_numeric_field_stats.ts @@ -16,9 +16,10 @@ import { ISearchOptions, } from '@kbn/data-plugin/common'; import type { ISearchStart } from '@kbn/data-plugin/public'; -import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils'; import { isPopulatedObject } from '@kbn/ml-is-populated-object'; +import { getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils'; import { + buildRandomSamplerAggregation, MAX_PERCENT, PERCENTILE_SPACING, SAMPLER_TOP_TERMS_SHARD_SIZE, @@ -101,7 +102,7 @@ export const getNumericFieldsStatsRequest = ( const searchBody = { query, - aggs: buildSamplerAggregation(aggs, samplerShardSize), + aggs: buildRandomSamplerAggregation(aggs, 0.05), ...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}), }; diff --git a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_string_field_stats.ts b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_string_field_stats.ts index 60306ded5d8f4..690342516d5e8 100644 --- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_string_field_stats.ts +++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_string_field_stats.ts @@ -15,9 +15,13 @@ import type { ISearchOptions, ISearchStart, } from '@kbn/data-plugin/public'; -import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils'; +import { getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils'; import { isPopulatedObject } from '@kbn/ml-is-populated-object'; -import { SAMPLER_TOP_TERMS_SHARD_SIZE, SAMPLER_TOP_TERMS_THRESHOLD } from './constants'; +import { + buildRandomSamplerAggregation, + SAMPLER_TOP_TERMS_SHARD_SIZE, + SAMPLER_TOP_TERMS_THRESHOLD, +} from './constants'; import type { Aggs, Bucket, @@ -34,6 +38,7 @@ export const getStringFieldStatsRequest = ( ) => { const { index, query, runtimeFieldMap, samplerShardSize } = params; + console.log('params', params); const size = 0; const aggs: Aggs = {}; @@ -67,10 +72,12 @@ export const getStringFieldStatsRequest = ( const searchBody = { query, - aggs: buildSamplerAggregation(aggs, samplerShardSize), + aggs: buildRandomSamplerAggregation(aggs, 0.05), ...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}), }; + console.log('searchBody', searchBody); + return { index, size,