diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index a163ec0b37f87..1a64dde2b6fb0 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -135,7 +135,7 @@ public static FiltersAggregator build( CardinalityUpperBound cardinality, Map metadata ) throws IOException { - if (canUseFilterByFilter(parent, factories, otherBucketKey)) { + if (canUseFilterByFilter(parent, otherBucketKey)) { return buildFilterByFilter(name, factories, filters, keyed, otherBucketKey, context, parent, cardinality, metadata); } return new FiltersAggregator.Compatible( @@ -155,8 +155,8 @@ public static FiltersAggregator build( * Can this aggregation be executed using the {@link FilterByFilter}? That * aggregator is much faster than the fallback {@link Compatible} aggregator. */ - public static boolean canUseFilterByFilter(Aggregator parent, AggregatorFactories factories, String otherBucketKey) { - return parent == null && factories.countAggregators() == 0 && otherBucketKey == null; + public static boolean canUseFilterByFilter(Aggregator parent, String otherBucketKey) { + return parent == null && otherBucketKey == null; } /** @@ -177,7 +177,7 @@ public static FilterByFilter buildFilterByFilter( CardinalityUpperBound cardinality, Map metadata ) throws IOException { - if (false == canUseFilterByFilter(parent, factories, otherBucketKey)) { + if (false == canUseFilterByFilter(parent, otherBucketKey)) { throw new IllegalStateException("Can't execute filter-by-filter"); } List> filtersWithTopLevel = new ArrayList<>(filters.size()); @@ -186,6 +186,7 @@ public static FilterByFilter buildFilterByFilter( } return new FiltersAggregator.FilterByFilter( name, + factories, filtersWithTopLevel, keyed, context, @@ -274,9 +275,12 @@ public static class FilterByFilter extends FiltersAggregator { * field. */ private int segmentsWithDocCountField; + private int segmentsCollected; + private int segmentsCounted; private FilterByFilter( String name, + AggregatorFactories factories, List> filters, boolean keyed, AggregationContext context, @@ -284,7 +288,7 @@ private FilterByFilter( CardinalityUpperBound cardinality, Map metadata ) throws IOException { - super(name, AggregatorFactories.EMPTY, filters, keyed, null, context, parent, cardinality, metadata); + super(name, factories, filters, keyed, null, context, parent, cardinality, metadata); this.profiling = context.profiling(); } @@ -294,6 +298,7 @@ private FilterByFilter( */ @SuppressWarnings("resource") // We're not in change of anything Closeable public long estimateCost(long maxCost) throws IOException { + // TODO if we have children we should use a different cost estimate this.maxCost = maxCost; if (estimatedCost != -1) { return estimatedCost; @@ -303,7 +308,9 @@ public long estimateCost(long maxCost) throws IOException { for (LeafReaderContext ctx : searcher().getIndexReader().leaves()) { CheckedSupplier canUseMetadata = canUseMetadata(ctx); for (QueryToFilterAdapter filter : filters()) { - estimatedCost += filter.estimateCountCost(ctx, canUseMetadata); + estimatedCost += subAggregators().length > 0 + ? filter.estimateCollectCost(ctx) + : filter.estimateCountCost(ctx, canUseMetadata); if (estimatedCost < 0) { // We've overflowed so we cap out and stop counting. estimatedCost = Long.MAX_VALUE; @@ -345,20 +352,82 @@ public long estimateCost(long maxCost) throws IOException { @Override protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { Bits live = ctx.reader().getLiveDocs(); - Counter counter = new Counter(docCountProvider); if (false == docCountProvider.alwaysOne()) { segmentsWithDocCountField++; } - for (int filterOrd = 0; filterOrd < filters().size(); filterOrd++) { - incrementBucketDocCount(filterOrd, filters().get(filterOrd).count(ctx, counter, live)); + if (subAggregators.length == 0) { + // TOOD we'd be better off if we could do sub.isNoop() or something. + /* + * Without sub.isNoop we always end up in the `collectXXX` modes even if + * the sub-aggregators opt out of traditional collection. + */ + collectCount(ctx, live); + } else { + collectSubs(ctx, live, sub); } // Throwing this exception is how we communicate to the collection mechanism that we don't need the segment. throw new CollectionTerminatedException(); } + /** + * Gather a count of the number of documents that match each filter + * without sending any documents to a sub-aggregator. This yields + * the correct response when there aren't any sub-aggregators or they + * all opt out of needing any sort of collection. + */ + private void collectCount(LeafReaderContext ctx, Bits live) throws IOException { + Counter counter = new Counter(docCountProvider); + for (int filterOrd = 0; filterOrd < filters().size(); filterOrd++) { + incrementBucketDocCount(filterOrd, filters().get(filterOrd).count(ctx, counter, live)); + } + } + + /** + * Collect all documents that match all filters and send them to + * the sub-aggregators. This method is only required when there are + * sub-aggregators that haven't opted out of being collected. + *

+ * This collects each filter one at a time, resetting the + * sub-aggregators between each filter as though they were hitting + * a fresh segment. + *

+ * It's very tempting to try and collect the + * filters into blocks of matches and then reply the whole block + * into ascending order without the resetting. That'd probably + * work better if the disk was very, very slow and we didn't have + * any kind of disk caching. But with disk caching its about twice + * as fast to collect each filter one by one like this. And it uses + * less memory because there isn't a need to buffer a block of matches. + * And its a hell of a lot less code. + */ + private void collectSubs(LeafReaderContext ctx, Bits live, LeafBucketCollector sub) throws IOException { + class MatchCollector implements LeafCollector { + LeafBucketCollector subCollector = sub; + int filterOrd; + + @Override + public void collect(int docId) throws IOException { + collectBucket(subCollector, docId, filterOrd); + } + + @Override + public void setScorer(Scorable scorer) throws IOException { + } + } + MatchCollector collector = new MatchCollector(); + filters().get(0).collect(ctx, collector, live); + for (int filterOrd = 1; filterOrd < filters().size(); filterOrd++) { + collector.subCollector = collectableSubAggregators.getLeafCollector(ctx); + collector.filterOrd = filterOrd; + filters().get(filterOrd).collect(ctx, collector, live); + } + } + @Override public void collectDebugInfo(BiConsumer add) { super.collectDebugInfo(add); + add.accept("segments_counted", segmentsCounted); + add.accept("segments_collected", segmentsCollected); add.accept("segments_with_deleted_docs", segmentsWithDeletedDocs); add.accept("segments_with_doc_count_field", segmentsWithDocCountField); if (estimatedCost != -1) { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java index f930124469bcd..c4297e2028cc0 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java @@ -17,6 +17,7 @@ import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; +import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointRangeQuery; @@ -216,12 +217,31 @@ long count(LeafReaderContext ctx, FiltersAggregator.Counter counter, Bits live) * Estimate the cost of calling {@code #count} in a leaf. */ long estimateCountCost(LeafReaderContext ctx, CheckedSupplier canUseMetadata) throws IOException { + return estimateCollectCost(ctx); + } + + /** + * Collect all documents that match this filter in this leaf. + */ + void collect(LeafReaderContext ctx, LeafCollector collector, Bits live) throws IOException { + BulkScorer scorer = bulkScorer(ctx, () -> {}); + if (scorer == null) { + // No hits in this segment. + return; + } + scorer.score(collector, live); + } + + /** + * Estimate the cost of calling {@code #count} in a leaf. + */ + long estimateCollectCost(LeafReaderContext ctx) throws IOException { BulkScorer scorer = bulkScorer(ctx, () -> scorersPreparedWhileEstimatingCost++); if (scorer == null) { // There aren't any matches for this filter in this leaf return 0; } - return scorer.cost(); // TODO in another PR (please) change this to ScorerSupplier.cost + return scorer.cost(); } /** diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index dd9536f1dc105..098bcadf27e4b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -350,7 +350,7 @@ public static FromFilters adaptIntoFiltersOrNull( // We don't generate sensible Queries for nanoseconds. return null; } - if (false == FiltersAggregator.canUseFilterByFilter(parent, factories, null)) { + if (false == FiltersAggregator.canUseFilterByFilter(parent, null)) { return null; } boolean wholeNumbersOnly = false == ((ValuesSource.Numeric) valuesSourceConfig.getValuesSource()).isFloatingPoint(); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java index 8a705a47c5036..785cd3b0cdbe0 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregatorFromFilters.java @@ -68,7 +68,7 @@ static StringTermsAggregatorFromFilters adaptIntoFiltersOrNull( if (false == valuesSourceConfig.alignesWithSearchIndex()) { return null; } - if (false == FiltersAggregator.canUseFilterByFilter(parent, factories, null)) { + if (false == FiltersAggregator.canUseFilterByFilter(parent, null)) { return null; } List> filters = new ArrayList<>(); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java index 32490c97060d0..d108473169f7b 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java @@ -13,6 +13,7 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.CollectionTerminatedException; @@ -26,6 +27,7 @@ import org.elasticsearch.common.CheckedConsumer; import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader; import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.index.cache.bitset.BitsetFilterCache; import org.elasticsearch.index.mapper.CustomTermFreqField; import org.elasticsearch.index.mapper.DateFieldMapper; @@ -34,6 +36,8 @@ import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType; import org.elasticsearch.index.mapper.ObjectMapper; import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; @@ -49,6 +53,10 @@ import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator.KeyedFilter; import org.elasticsearch.search.aggregations.bucket.nested.NestedAggregatorTests; +import org.elasticsearch.search.aggregations.metrics.InternalMax; +import org.elasticsearch.search.aggregations.metrics.InternalSum; +import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.SumAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator.PipelineTree; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; @@ -56,17 +64,23 @@ import org.junit.Before; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.TimeUnit; +import static org.hamcrest.Matchers.both; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasEntry; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.lessThan; import static org.mockito.Mockito.mock; public class FiltersAggregatorTests extends AggregatorTestCase { @@ -514,6 +528,217 @@ public void testTermQuery() throws IOException { }, ft); } + public void testSubAggs() throws IOException { + MappedFieldType dateFt = new DateFieldMapper.DateFieldType( + "test", + true, + false, + false, + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER, + Resolution.MILLISECONDS, + null, + null + ); + MappedFieldType intFt = new NumberFieldMapper.NumberFieldType("int", NumberType.INTEGER); + AggregationBuilder builder = new FiltersAggregationBuilder( + "test", + new KeyedFilter("q1", new RangeQueryBuilder("test").from("2010-01-01").to("2010-03-01").includeUpper(false)), + new KeyedFilter("q2", new RangeQueryBuilder("test").from("2020-01-01").to("2020-03-01").includeUpper(false)) + ).subAggregation(new MaxAggregationBuilder("m").field("int")).subAggregation(new SumAggregationBuilder("s").field("int")); + List> docs = new ArrayList<>(); + docs.add( + List.of( + new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2010-01-02")), + new SortedNumericDocValuesField("int", 100) + ) + ); + docs.add( + List.of( + new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-02")), + new SortedNumericDocValuesField("int", 5) + ) + ); + docs.add( + List.of( + new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-03")), + new SortedNumericDocValuesField("int", 10) + ) + ); + /* + * Shuffle the docs so we collect them in a random order which causes + * bad implementations of filter-by-filter aggregation to fail with + * assertion errors while executing. + */ + Collections.shuffle(docs, random()); + testCase(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), result -> { + InternalFilters filters = (InternalFilters) result; + assertThat(filters.getBuckets(), hasSize(2)); + + InternalFilters.InternalBucket b = filters.getBucketByKey("q1"); + assertThat(b.getDocCount(), equalTo(1L)); + InternalMax max = b.getAggregations().get("m"); + assertThat(max.getValue(), equalTo(100.0)); + InternalSum sum = b.getAggregations().get("s"); + assertThat(sum.getValue(), equalTo(100.0)); + + b = filters.getBucketByKey("q2"); + assertThat(b.getDocCount(), equalTo(2L)); + max = b.getAggregations().get("m"); + assertThat(max.getValue(), equalTo(10.0)); + sum = b.getAggregations().get("s"); + assertThat(sum.getValue(), equalTo(15.0)); + }, dateFt, intFt); + withAggregator(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), (searcher, aggregator) -> { + assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class)); + FiltersAggregator.FilterByFilter filterByFilter = (FiltersAggregator.FilterByFilter) aggregator; + int maxDoc = searcher.getIndexReader().maxDoc(); + assertThat(filterByFilter.estimateCost(maxDoc), equalTo(3L)); + Map debug = new HashMap<>(); + filterByFilter.filters().get(0).collectDebugInfo(debug::put); + assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1)); + debug = new HashMap<>(); + filterByFilter.filters().get(1).collectDebugInfo(debug::put); + assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1)); + }, dateFt, intFt); + } + + public void testSubAggsManyDocs() throws IOException { + MappedFieldType dateFt = new DateFieldMapper.DateFieldType( + "test", + true, + false, + false, + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER, + Resolution.MILLISECONDS, + null, + null + ); + MappedFieldType intFt = new NumberFieldMapper.NumberFieldType("int", NumberType.INTEGER); + AggregationBuilder builder = new FiltersAggregationBuilder( + "test", + new KeyedFilter("q1", new RangeQueryBuilder("test").from("2010-01-01").to("2010-03-01").includeUpper(false)), + new KeyedFilter("q2", new RangeQueryBuilder("test").from("2020-01-01").to("2020-03-01").includeUpper(false)) + ).subAggregation(new MaxAggregationBuilder("m").field("int")).subAggregation(new SumAggregationBuilder("s").field("int")); + List> docs = new ArrayList<>(); + long[] times = new long[] { + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2010-01-02"), + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-02"), + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-03"), + }; + for (int i = 0; i < 10000; i++) { + docs.add(List.of(new LongPoint("test", times[i % 3]), new SortedNumericDocValuesField("int", i))); + } + /* + * Shuffle the docs so we collect them in a random order which causes + * bad implementations of filter-by-filter aggregation to fail with + * assertion errors while executing. + */ + Collections.shuffle(docs, random()); + testCase(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), result -> { + InternalFilters filters = (InternalFilters) result; + assertThat(filters.getBuckets(), hasSize(2)); + + InternalFilters.InternalBucket b = filters.getBucketByKey("q1"); + assertThat(b.getDocCount(), equalTo(3334L)); + InternalMax max = b.getAggregations().get("m"); + assertThat(max.getValue(), equalTo(9999.0)); + InternalSum sum = b.getAggregations().get("s"); + assertThat(sum.getValue(), equalTo(16668333.0)); + + b = filters.getBucketByKey("q2"); + assertThat(b.getDocCount(), equalTo(6666L)); + max = b.getAggregations().get("m"); + assertThat(max.getValue(), equalTo(9998.0)); + sum = b.getAggregations().get("s"); + assertThat(sum.getValue(), equalTo(33326667.0)); + }, dateFt, intFt); + withAggregator(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), (searcher, aggregator) -> { + assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class)); + FiltersAggregator.FilterByFilter filterByFilter = (FiltersAggregator.FilterByFilter) aggregator; + int maxDoc = searcher.getIndexReader().maxDoc(); + assertThat(filterByFilter.estimateCost(maxDoc), both(greaterThanOrEqualTo(10000L)).and(lessThan(20000L))); + Map debug = new HashMap<>(); + filterByFilter.filters().get(0).collectDebugInfo(debug::put); + assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1)); + debug = new HashMap<>(); + filterByFilter.filters().get(1).collectDebugInfo(debug::put); + assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1)); + }, dateFt, intFt); + } + + public void testSubAggsManyFilters() throws IOException { + MappedFieldType dateFt = new DateFieldMapper.DateFieldType( + "test", + true, + false, + false, + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER, + Resolution.MILLISECONDS, + null, + null + ); + MappedFieldType intFt = new NumberFieldMapper.NumberFieldType("int", NumberType.INTEGER); + List buckets = new ArrayList<>(); + DateFormatter formatter = DateFormatter.forPattern("strict_date"); + long start = formatter.parseMillis("2010-01-01"); + long lastRange = formatter.parseMillis("2020-03-01"); + while (start < lastRange) { + long end = start + TimeUnit.DAYS.toMillis(30); + String key = formatter.formatMillis(start) + " to " + formatter.formatMillis(end); + buckets.add(new KeyedFilter(key, new RangeQueryBuilder("test").from(start).to(end).includeUpper(false))); + start = end; + } + AggregationBuilder builder = new FiltersAggregationBuilder( + "test", + buckets.toArray(KeyedFilter[]::new) + ).subAggregation(new MaxAggregationBuilder("m").field("int")).subAggregation(new SumAggregationBuilder("s").field("int")); + List> docs = new ArrayList<>(); + long[] times = new long[] { + formatter.parseMillis("2010-01-02"), + formatter.parseMillis("2020-01-02"), + formatter.parseMillis("2020-01-03"), }; + for (int i = 0; i < 10000; i++) { + docs.add(List.of(new LongPoint("test", times[i % 3]), new SortedNumericDocValuesField("int", i))); + } + /* + * Shuffle the docs so we collect them in a random order which causes + * bad implementations of filter-by-filter aggregation to fail with + * assertion errors while executing. + */ + Collections.shuffle(docs, random()); + testCase(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), result -> { + InternalFilters filters = (InternalFilters) result; + assertThat(filters.getBuckets(), hasSize(buckets.size())); + + InternalFilters.InternalBucket b = filters.getBucketByKey("2010-01-01 to 2010-01-31"); + assertThat(b.getDocCount(), equalTo(3334L)); + InternalMax max = b.getAggregations().get("m"); + assertThat(max.getValue(), equalTo(9999.0)); + InternalSum sum = b.getAggregations().get("s"); + assertThat(sum.getValue(), equalTo(16668333.0)); + + b = filters.getBucketByKey("2019-12-10 to 2020-01-09"); + assertThat(b.getDocCount(), equalTo(6666L)); + max = b.getAggregations().get("m"); + assertThat(max.getValue(), equalTo(9998.0)); + sum = b.getAggregations().get("s"); + assertThat(sum.getValue(), equalTo(33326667.0)); + }, dateFt, intFt); + withAggregator(builder, new MatchAllDocsQuery(), iw -> iw.addDocuments(docs), (searcher, aggregator) -> { + assertThat(aggregator, instanceOf(FiltersAggregator.FilterByFilter.class)); + FiltersAggregator.FilterByFilter filterByFilter = (FiltersAggregator.FilterByFilter) aggregator; + int maxDoc = searcher.getIndexReader().maxDoc(); + assertThat(filterByFilter.estimateCost(maxDoc), both(greaterThanOrEqualTo(10000L)).and(lessThan(20000L))); + for (int b = 0; b < buckets.size(); b++) { + Map debug = new HashMap<>(); + filterByFilter.filters().get(0).collectDebugInfo(debug::put); + assertThat((int) debug.get("scorers_prepared_while_estimating_cost"), greaterThanOrEqualTo(1)); + } + }, dateFt, intFt); + } + + + @Override protected List objectMappers() { return MOCK_OBJECT_MAPPERS; diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java index 6b3e14c34e119..90bbbcaed1adc 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java @@ -32,6 +32,7 @@ import org.elasticsearch.common.CheckedConsumer; import org.elasticsearch.common.breaker.CircuitBreaker; import org.elasticsearch.common.geo.GeoPoint; +import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.network.InetAddresses; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.BigArrays; @@ -317,9 +318,9 @@ private List doc(MappedFieldType ft, String... values) { List doc = new ArrayList(); for (String v : values) { BytesRef bytes = new BytesRef(v); - doc.add(new SortedSetDocValuesField("string", bytes)); + doc.add(new SortedSetDocValuesField(ft.name(), bytes)); if (ft.isSearchable()) { - doc.add(new KeywordField("string", bytes, KeywordFieldMapper.Defaults.FIELD_TYPE)); + doc.add(new KeywordField(ft.name(), bytes, KeywordFieldMapper.Defaults.FIELD_TYPE)); } } return doc; @@ -645,7 +646,7 @@ public void testNumericIncludeExclude() throws Exception { } public void testStringTermsAggregator() throws Exception { - MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("field"); + MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("field", randomBoolean(), true, null); BiFunction> luceneFieldFactory = (val, mv) -> { List result = new ArrayList<>(2); if (mv) { @@ -660,8 +661,7 @@ public void testStringTermsAggregator() throws Exception { }; termsAggregator(ValueType.STRING, fieldType, i -> Integer.toString(i), String::compareTo, luceneFieldFactory); - termsAggregatorWithNestedMaxAgg(ValueType.STRING, fieldType, i -> Integer.toString(i), - val -> new SortedDocValuesField("field", new BytesRef(val))); + termsAggregatorWithNestedMaxAgg(ValueType.STRING, fieldType, i -> Integer.toString(i), val -> luceneFieldFactory.apply(val, false)); } public void testLongTermsAggregator() throws Exception { @@ -675,7 +675,7 @@ public void testLongTermsAggregator() throws Exception { MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType("field", NumberFieldMapper.NumberType.LONG); termsAggregator(ValueType.LONG, fieldType, Integer::longValue, Long::compareTo, luceneFieldFactory); - termsAggregatorWithNestedMaxAgg(ValueType.LONG, fieldType, Integer::longValue, val -> new NumericDocValuesField("field", val)); + termsAggregatorWithNestedMaxAgg(ValueType.LONG, fieldType, Integer::longValue, val -> luceneFieldFactory.apply(val, false)); } public void testDoubleTermsAggregator() throws Exception { @@ -690,7 +690,7 @@ public void testDoubleTermsAggregator() throws Exception { = new NumberFieldMapper.NumberFieldType("field", NumberFieldMapper.NumberType.DOUBLE); termsAggregator(ValueType.DOUBLE, fieldType, Integer::doubleValue, Double::compareTo, luceneFieldFactory); termsAggregatorWithNestedMaxAgg(ValueType.DOUBLE, fieldType, Integer::doubleValue, - val -> new NumericDocValuesField("field", Double.doubleToRawLongBits(val))); + val -> luceneFieldFactory.apply(val, false)); } public void testIpTermsAggregator() throws Exception { @@ -857,7 +857,7 @@ private void termsAggregator(ValueType valueType, MappedFieldType fieldType, private void termsAggregatorWithNestedMaxAgg(ValueType valueType, MappedFieldType fieldType, Function valueFactory, - Function luceneFieldFactory) throws Exception { + Function> luceneFieldFactory) throws Exception { final Map counts = new HashMap<>(); int numTerms = scaledRandomIntBetween(8, 128); for (int i = 0; i < numTerms; i++) { @@ -867,8 +867,8 @@ private void termsAggregatorWithNestedMaxAgg(ValueType valueType, MappedFiel try (Directory directory = newDirectory()) { try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { for (Map.Entry entry : counts.entrySet()) { - Document document = new Document(); - document.add(luceneFieldFactory.apply(entry.getKey())); + List document = new ArrayList<>(); + document.addAll(luceneFieldFactory.apply(entry.getKey())); document.add(new NumericDocValuesField("value", entry.getValue())); indexWriter.addDocument(document); } @@ -901,7 +901,7 @@ private void termsAggregatorWithNestedMaxAgg(ValueType valueType, MappedFiel MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType("value", NumberFieldMapper.NumberType.LONG); - AggregationContext context = createAggregationContext(indexSearcher, null, fieldType, fieldType2); + AggregationContext context = createAggregationContext(indexSearcher, new MatchAllDocsQuery(), fieldType, fieldType2); Aggregator aggregator = createAggregator(aggregationBuilder, context); aggregator.preCollection(); indexSearcher.search(new MatchAllDocsQuery(), aggregator); @@ -1091,19 +1091,21 @@ public void testIpField() throws Exception { } public void testNestedTermsAgg() throws Exception { + MappedFieldType fieldType1 = new KeywordFieldMapper.KeywordFieldType("field1", randomBoolean(), true, null); + MappedFieldType fieldType2 = new KeywordFieldMapper.KeywordFieldType("field2", randomBoolean(), true, null); try (Directory directory = newDirectory()) { try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { - Document document = new Document(); - document.add(new SortedDocValuesField("field1", new BytesRef("a"))); - document.add(new SortedDocValuesField("field2", new BytesRef("b"))); + List document = new ArrayList<>(); + document.addAll(doc(fieldType1, "a")); + document.addAll(doc(fieldType2, "b")); indexWriter.addDocument(document); - document = new Document(); - document.add(new SortedDocValuesField("field1", new BytesRef("c"))); - document.add(new SortedDocValuesField("field2", new BytesRef("d"))); + document = new ArrayList<>(); + document.addAll(doc(fieldType1, "c")); + document.addAll(doc(fieldType2, "d")); indexWriter.addDocument(document); - document = new Document(); - document.add(new SortedDocValuesField("field1", new BytesRef("e"))); - document.add(new SortedDocValuesField("field2", new BytesRef("f"))); + document = new ArrayList<>(); + document.addAll(doc(fieldType1, "e")); + document.addAll(doc(fieldType2, "f")); indexWriter.addDocument(document); try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) { IndexSearcher indexSearcher = newIndexSearcher(indexReader); @@ -1121,10 +1123,7 @@ public void testNestedTermsAgg() throws Exception { .field("field2") .order(BucketOrder.key(true)) ); - MappedFieldType fieldType1 = new KeywordFieldMapper.KeywordFieldType("field1"); - MappedFieldType fieldType2 = new KeywordFieldMapper.KeywordFieldType("field2"); - - AggregationContext context = createAggregationContext(indexSearcher, null, fieldType1, fieldType2); + AggregationContext context = createAggregationContext(indexSearcher, new MatchAllDocsQuery(), fieldType1, fieldType2); Aggregator aggregator = createAggregator(aggregationBuilder, context); aggregator.preCollection(); indexSearcher.search(new MatchAllDocsQuery(), aggregator); @@ -1309,12 +1308,14 @@ public void testWithNestedAggregations() throws IOException { } public void testHeisenpig() throws IOException { + MappedFieldType nestedFieldType = new NumberFieldMapper.NumberFieldType("number", NumberFieldMapper.NumberType.LONG); + KeywordFieldType animalFieldType = new KeywordFieldType("str", randomBoolean(), true, null); try (Directory directory = newDirectory()) { try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { String[] tags = new String[] {"danger", "fluffiness"}; - indexWriter.addDocuments(generateAnimalDocsWithNested("1", "sheep", tags, new int[] {1, 10})); - indexWriter.addDocuments(generateAnimalDocsWithNested("2", "cow", tags, new int[] {3, 1})); - indexWriter.addDocuments(generateAnimalDocsWithNested("3", "pig", tags, new int[] {100, 1})); + indexWriter.addDocuments(generateAnimalDocsWithNested("1", animalFieldType, "sheep", tags, new int[] {1, 10})); + indexWriter.addDocuments(generateAnimalDocsWithNested("2", animalFieldType, "cow", tags, new int[] {3, 1})); + indexWriter.addDocuments(generateAnimalDocsWithNested("3", animalFieldType, "pig", tags, new int[] {100, 1})); indexWriter.commit(); NestedAggregationBuilder nested = new NestedAggregationBuilder("nested", "nested_object") .subAggregation( @@ -1326,12 +1327,10 @@ public void testHeisenpig() throws IOException { .shardSize(10) .size(10) .order(BucketOrder.aggregation("nested>max_number", false)); - MappedFieldType nestedFieldType = new NumberFieldMapper.NumberFieldType("number", NumberFieldMapper.NumberType.LONG); - MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("str"); try (IndexReader indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) { StringTerms result = searchAndReduce(newSearcher(indexReader, false, true), // match root document only - new DocValuesFieldExistsQuery(PRIMARY_TERM_NAME), terms, fieldType, nestedFieldType); + Queries.newNonNestedFilter(), terms, animalFieldType, nestedFieldType); assertThat(result.getBuckets().get(0).getKeyAsString(), equalTo("pig")); assertThat(result.getBuckets().get(0).docCount, equalTo(1L)); assertThat(((InternalMax) (((InternalNested)result.getBuckets().get(0).getAggregations().get("nested")) @@ -1404,15 +1403,19 @@ public void testThreeLayerStringViaMap() throws IOException { } private void threeLayerStringTestCase(String executionHint) throws IOException { + MappedFieldType ift = new KeywordFieldType("i", randomBoolean(), true, null); + MappedFieldType jft = new KeywordFieldType("j", randomBoolean(), true, null); + MappedFieldType kft = new KeywordFieldType("k", randomBoolean(), true, null); + try (Directory dir = newDirectory()) { try (RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) { for (int i = 0; i < 10; i++) { for (int j = 0; j < 10; j++) { for (int k = 0; k < 10; k++) { - Document d = new Document(); - d.add(new SortedDocValuesField("i", new BytesRef(Integer.toString(i)))); - d.add(new SortedDocValuesField("j", new BytesRef(Integer.toString(j)))); - d.add(new SortedDocValuesField("k", new BytesRef(Integer.toString(k)))); + List d = new ArrayList<>(); + d.addAll(doc(ift, Integer.toString(i))); + d.addAll(doc(jft, Integer.toString(j))); + d.addAll(doc(kft, Integer.toString(k))); writer.addDocument(d); } } @@ -1422,8 +1425,7 @@ private void threeLayerStringTestCase(String executionHint) throws IOException { TermsAggregationBuilder request = new TermsAggregationBuilder("i").field("i").executionHint(executionHint) .subAggregation(new TermsAggregationBuilder("j").field("j").executionHint(executionHint) .subAggregation(new TermsAggregationBuilder("k").field("k").executionHint(executionHint))); - StringTerms result = searchAndReduce(searcher, new MatchAllDocsQuery(), request, - keywordField("i"), keywordField("j"), keywordField("k")); + StringTerms result = searchAndReduce(searcher, new MatchAllDocsQuery(), request, ift, jft, kft); for (int i = 0; i < 10; i++) { StringTerms.Bucket iBucket = result.getBucketByKey(Integer.toString(i)); assertThat(iBucket.getDocCount(), equalTo(100L)); @@ -1707,11 +1709,17 @@ private List generateDocsWithNested(String id, int value, int[] nested return documents; } - private List generateAnimalDocsWithNested(String id, String animal, String[] tags, int[] nestedValues) { - List documents = new ArrayList<>(); + private List> generateAnimalDocsWithNested( + String id, + KeywordFieldType animalFieldType, + String animal, + String[] tags, + int[] nestedValues + ) { + List> documents = new ArrayList<>(); for (int i = 0; i < tags.length; i++) { - Document document = new Document(); + List document = new ArrayList<>(); document.add(new Field(IdFieldMapper.NAME, Uid.encodeId(id), IdFieldMapper.Defaults.NESTED_FIELD_TYPE)); document.add(new Field(NestedPathFieldMapper.NAME, "nested_object", NestedPathFieldMapper.Defaults.FIELD_TYPE)); @@ -1720,9 +1728,9 @@ private List generateAnimalDocsWithNested(String id, String animal, St documents.add(document); } - Document document = new Document(); + List document = new ArrayList<>(); document.add(new Field(IdFieldMapper.NAME, Uid.encodeId(id), IdFieldMapper.Defaults.FIELD_TYPE)); - document.add(new SortedDocValuesField("str", new BytesRef(animal))); + document.addAll(doc(animalFieldType, animal)); document.add(new Field(NestedPathFieldMapper.NAME, "docs", NestedPathFieldMapper.Defaults.FIELD_TYPE)); document.add(sequenceIDFields.primaryTerm); documents.add(document);