From 57e626ed07f8bcacd0360a4ef025c9d23fde590c Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 8 Dec 2020 14:13:20 -0500 Subject: [PATCH] Optimize date_historam's hard_bounds (#66051) This allows `date_histogram`s with `hard_bounds` and `extended_bounds` to use the "as range" style optimizations introducedin #63643. There isn't any work to do for `exended_bounds` besides add a test. For `hard_bounds` we have to be careful when constructing the ranges that to filter. --- .../histogram/DateHistogramAggregator.java | 30 ++++++++---- .../histogram/InternalDateHistogram.java | 2 +- .../DateHistogramAggregatorTests.java | 46 ++++++++++++++++++- 3 files changed, 68 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 61b8162f3f500..55b3b8c5f4e0b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -148,9 +148,6 @@ private static FromDateRange adaptIntoRangeOrNull( CardinalityUpperBound cardinality, Map metadata ) throws IOException { - if (hardBounds != null || extendedBounds != null) { - return null; - } long[] fixedRoundingPoints = preparedRounding.fixedRoundingPoints(); if (fixedRoundingPoints == null) { return null; @@ -169,11 +166,7 @@ private static FromDateRange adaptIntoRangeOrNull( if (rangeSupplier == null) { return null; } - RangeAggregator.Range[] ranges = new RangeAggregator.Range[fixedRoundingPoints.length]; - for (int i = 0; i < fixedRoundingPoints.length - 1; i++) { - ranges[i] = new RangeAggregator.Range(null, (double) fixedRoundingPoints[i], (double) fixedRoundingPoints[i + 1]); - } - ranges[ranges.length - 1] = new RangeAggregator.Range(null, (double) fixedRoundingPoints[fixedRoundingPoints.length - 1], null); + RangeAggregator.Range[] ranges = ranges(hardBounds, fixedRoundingPoints); return new DateHistogramAggregator.FromDateRange( parent, factories, @@ -200,6 +193,27 @@ private static FromDateRange adaptIntoRangeOrNull( ); } + private static RangeAggregator.Range[] ranges(LongBounds hardBounds, long[] fixedRoundingPoints) { + if (hardBounds == null) { + RangeAggregator.Range[] ranges = new RangeAggregator.Range[fixedRoundingPoints.length]; + for (int i = 0; i < fixedRoundingPoints.length - 1; i++) { + ranges[i] = new RangeAggregator.Range(null, (double) fixedRoundingPoints[i], (double) fixedRoundingPoints[i + 1]); + } + ranges[ranges.length - 1] = new RangeAggregator.Range(null, (double) fixedRoundingPoints[fixedRoundingPoints.length - 1], null); + return ranges; + } + List ranges = new ArrayList<>(fixedRoundingPoints.length); + for (int i = 0; i < fixedRoundingPoints.length - 1; i++) { + if (hardBounds.contain(fixedRoundingPoints[i])) { + ranges.add(new RangeAggregator.Range(null, (double) fixedRoundingPoints[i], (double) fixedRoundingPoints[i + 1])); + } + } + if (hardBounds.contain(fixedRoundingPoints[fixedRoundingPoints.length - 1])) { + ranges.add(new RangeAggregator.Range(null, (double) fixedRoundingPoints[fixedRoundingPoints.length - 1], null)); + } + return ranges.toArray(RangeAggregator.Range[]::new); + } + private final ValuesSource.Numeric valuesSource; private final DocValueFormat formatter; private final Rounding rounding; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalDateHistogram.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalDateHistogram.java index eb2d3c036603e..0017ea7a7a666 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalDateHistogram.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalDateHistogram.java @@ -367,7 +367,7 @@ private void addEmptyBuckets(List list, ReduceContext reduceContext) { LongBounds bounds = emptyBucketInfo.bounds; ListIterator iter = list.listIterator(); - // first adding all the empty buckets *before* the actual data (based on th extended_bounds.min the user requested) + // first adding all the empty buckets *before* the actual data (based on the extended_bounds.min the user requested) InternalAggregations reducedEmptySubAggs = InternalAggregations.reduce(Collections.singletonList(emptyBucketInfo.subAggregations), reduceContext); if (bounds != null) { diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java index 0dc026ecd42a3..13c964d4e8380 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java @@ -36,8 +36,10 @@ import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.BucketOrder; +import org.elasticsearch.search.aggregations.InternalAggregation.ReduceContext; import org.elasticsearch.search.aggregations.bucket.terms.StringTerms; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator.PipelineTree; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; import org.hamcrest.Matcher; @@ -1196,9 +1198,44 @@ public void testMissingValueDoesNotUseFromRange() throws IOException { ); } + public void testExtendedBoundsUsesFromRange() throws IOException { + aggregationImplementationChoiceTestCase( + aggregableDateFieldType(false, true, DateFormatter.forPattern("yyyy")), + List.of("2017", "2018"), + List.of("2016", "2017", "2018", "2019"), + new DateHistogramAggregationBuilder("test").field(AGGREGABLE_DATE) + .calendarInterval(DateHistogramInterval.YEAR) + .extendedBounds(new LongBounds("2016", "2019")) + .minDocCount(0), + true + ); + } + + public void testHardBoundsUsesFromRange() throws IOException { + aggregationImplementationChoiceTestCase( + aggregableDateFieldType(false, true, DateFormatter.forPattern("yyyy")), + List.of("2016", "2017", "2018", "2019"), + List.of("2017", "2018"), + new DateHistogramAggregationBuilder("test").field(AGGREGABLE_DATE) + .calendarInterval(DateHistogramInterval.YEAR) + .hardBounds(new LongBounds("2017", "2019")), + true + ); + } + + private void aggregationImplementationChoiceTestCase( + DateFieldMapper.DateFieldType ft, + List data, + DateHistogramAggregationBuilder builder, + boolean usesFromRange + ) throws IOException { + aggregationImplementationChoiceTestCase(ft, data, data, builder, usesFromRange); + } + private void aggregationImplementationChoiceTestCase( DateFieldMapper.DateFieldType ft, List data, + List resultingBucketKeys, DateHistogramAggregationBuilder builder, boolean usesFromRange ) throws IOException { @@ -1220,7 +1257,14 @@ private void aggregationImplementationChoiceTestCase( agg.preCollection(); context.searcher().search(context.query(), agg); InternalDateHistogram result = (InternalDateHistogram) agg.buildTopLevel(); - assertThat(result.getBuckets().stream().map(InternalDateHistogram.Bucket::getKeyAsString).collect(toList()), equalTo(data)); + result = (InternalDateHistogram) result.reduce( + List.of(result), + ReduceContext.forFinalReduction(context.bigArrays(), null, context.multiBucketConsumer(), PipelineTree.EMPTY) + ); + assertThat( + result.getBuckets().stream().map(InternalDateHistogram.Bucket::getKeyAsString).collect(toList()), + equalTo(resultingBucketKeys) + ); } } }