Skip to content

Commit

Permalink
Fail variable_width_histogram that collects from many (#58619) (#58780)
Browse files Browse the repository at this point in the history
Adds an explicit check to `variable_width_histogram` to stop it from
trying to collect from many buckets because it can't. I tried to make it
do so but that is more than an afternoon's project, sadly. So for now we
just disallow it.

Relates to #42035
  • Loading branch information
nik9000 authored Jun 30, 2020
1 parent 3aa08fb commit 40850a7
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ Response:
--------------------------------------------------
// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/]

IMPORTANT: This aggregation cannot currently be nested under any aggregation that collects from more than a single bucket.

==== Clustering Algorithm
Each shard fetches the first `initial_buffer` documents and stores them in memory. Once the buffer is full, these documents
are sorted and linearly separated into `3/4 * shard_size buckets`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,13 @@ protected Aggregator doCreateInternal(SearchContext searchContext,
Aggregator parent,
boolean collectsFromSingleBucket,
Map<String, Object> metadata) throws IOException{
if (collectsFromSingleBucket == false) {
throw new IllegalArgumentException(
"["
+ VariableWidthHistogramAggregationBuilder.NAME
+ "] cannot be nested inside an aggregation that collects more than a single bucket."
);
}
AggregatorSupplier aggregatorSupplier = queryShardContext.getValuesSourceRegistry().getAggregator(config,
VariableWidthHistogramAggregationBuilder.NAME);
if (aggregatorSupplier instanceof VariableWidthHistogramAggregatorSupplier == false) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,16 @@
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.bucket.terms.InternalTerms;
import org.elasticsearch.search.aggregations.bucket.terms.LongTerms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.InternalStats;
import org.elasticsearch.search.aggregations.metrics.StatsAggregationBuilder;
Expand All @@ -51,12 +54,15 @@
import java.util.Map;
import java.util.function.Consumer;

import static java.util.stream.Collectors.toList;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;

public class VariableWidthHistogramAggregatorTests extends AggregatorTestCase {

private static final String NUMERIC_FIELD = "numeric";

private static final Query DEFAULT_QUERY = new MatchAllDocsQuery();
private VariableWidthHistogramAggregationBuilder aggregationBuilder;

public void testNoDocs() throws Exception{
final List<Number> dataset = Arrays.asList();
Expand Down Expand Up @@ -424,6 +430,54 @@ public void testMultipleSegments() throws IOException{

}

public void testAsSubAggregation() throws IOException {
AggregationBuilder builder = new TermsAggregationBuilder("t").field("t")
.subAggregation(new VariableWidthHistogramAggregationBuilder("v").field("v").setNumBuckets(2));
CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
iw.addDocument(
org.elasticsearch.common.collect.List.of(new SortedNumericDocValuesField("t", 1), new SortedNumericDocValuesField("v", 1))
);
iw.addDocument(
org.elasticsearch.common.collect.List.of(new SortedNumericDocValuesField("t", 1), new SortedNumericDocValuesField("v", 10))
);
iw.addDocument(
org.elasticsearch.common.collect.List.of(new SortedNumericDocValuesField("t", 1), new SortedNumericDocValuesField("v", 11))
);

iw.addDocument(
org.elasticsearch.common.collect.List.of(new SortedNumericDocValuesField("t", 2), new SortedNumericDocValuesField("v", 20))
);
iw.addDocument(
org.elasticsearch.common.collect.List.of(new SortedNumericDocValuesField("t", 2), new SortedNumericDocValuesField("v", 30))
);
};
Consumer<LongTerms> verify = terms -> {
/*
* This is what the result should be but it never gets called because of
* the explicit check. We do expect to remove the check in the future,
* thus, this stays.
*/
LongTerms.Bucket t1 = terms.getBucketByKey("1");
InternalVariableWidthHistogram v1 = t1.getAggregations().get("v");
assertThat(
v1.getBuckets().stream().map(InternalVariableWidthHistogram.Bucket::centroid).collect(toList()),
equalTo(org.elasticsearch.common.collect.List.of(1.0, 10.5))
);

LongTerms.Bucket t2 = terms.getBucketByKey("1");
InternalVariableWidthHistogram v2 = t2.getAggregations().get("v");
assertThat(
v2.getBuckets().stream().map(InternalVariableWidthHistogram.Bucket::centroid).collect(toList()),
equalTo(org.elasticsearch.common.collect.List.of(20.0, 30))
);
};
Exception e = expectThrows(
IllegalArgumentException.class,
() -> testCase(builder, DEFAULT_QUERY, buildIndex, verify, longField("t"), longField("v"))
);
assertThat(e.getMessage(), containsString("cannot be nested"));
}


private void testSearchCase(final Query query, final List<Number> dataset, boolean multipleSegments,
final Consumer<VariableWidthHistogramAggregationBuilder> configure,
Expand Down

0 comments on commit 40850a7

Please sign in to comment.