Added support for sorting buckets based on sub aggregations

Supports sorting on sub-aggs down the current hierarchy. This is supported as long as the aggregation in the specified order path are of a single-bucket type, where the last aggregation in the path points to either a single-bucket aggregation or a metrics one. If it's a single-bucket aggregation, the sort will be applied on the document count in the bucket (i.e. doc_count), and if it is a metrics type, the sort will be applied on the pointed out metric (in case of a single-metric aggregations, such as avg, the sort will be applied on the single metric value) NOTE: this commit adds a constraint on what should be considered a valid aggregation name. Aggregations names must be alpha-numeric and may contain '-' and '_'. Closes elastic#5253
javanna · Mar 5, 2014 · 9d0fc76 · 9d0fc76
1 parent b723ee0
commit 9d0fc76
Show file tree

Hide file tree

Showing 24 changed files with 1,011 additions and 172 deletions.
diff --git a/docs/reference/search/aggregations/bucket/histogram-aggregation.asciidoc b/docs/reference/search/aggregations/bucket/histogram-aggregation.asciidoc
@@ -175,6 +175,48 @@ If the histogram aggregation has a direct metrics sub-aggregation, the latter ca
 
 <2> There is no need to configure the `price` field for the `price_stats` aggregation as it will inherit it by default from its parent histogram aggregation.
 
+It is also possible to order the buckets based on a "deeper" aggregation in the hierarchy. This is supported as long
+as the aggregations path are of a single-bucket type, where the last aggregation in the path may either by a single-bucket
+one or a metrics one. If it's a single-bucket type, the order will be defined by the number of docs in the bucket (i.e. `doc_count`),
+in case it's a metrics one, the same rules as above apply (where the path must indicate the metric name to sort by in case of
+a multi-value metrics aggregation, and in case of a single-value metrics aggregation the sort will be applied on that value).
+
+The path must be defined in the following form:
+
+--------------------------------------------------
+AGG_SEPARATOR       :=  '>'
+METRIC_SEPARATOR    :=  '.'
+AGG_NAME            :=  <the name of the aggregation>
+METRIC              :=  <the name of the metric (in case of multi-value metrics aggregation)>
+PATH                :=  <AGG_NAME>[<AGG_SEPARATOR><AGG_NAME>]*[<METRIC_SEPARATOR><METRIC>]
+--------------------------------------------------
+
+[source,js]
+--------------------------------------------------
+{
+    "aggs" : {
+        "prices" : {
+            "histogram" : {
+                "field" : "price",
+                "interval" : 50,
+                "order" : { "promoted_products>rating_stats.avg" : "desc" } <1>
+            },
+            "aggs" : {
+                "promoted_products" : {
+                    "filter" : { "term" : { "promoted" : true }},
+                    "aggs" : {
+                        "rating_stats" : { "stats" : { "field" : "rating" }}
+                    }
+                }
+            }
+        }
+    }
+}
+--------------------------------------------------
+
+The above will sort the buckets based on the avg rating among the promoted products
+
+
 ==== Minimum document count
 
 It is possible to only return buckets that have a document count that is greater than or equal to a configured

diff --git a/docs/reference/search/aggregations/bucket/terms-aggregation.asciidoc b/docs/reference/search/aggregations/bucket/terms-aggregation.asciidoc
@@ -144,6 +144,46 @@ Ordering the buckets by multi value metrics sub-aggregation (identified by the a
 }
 --------------------------------------------------
 
+It is also possible to order the buckets based on a "deeper" aggregation in the hierarchy. This is supported as long
+as the aggregations path are of a single-bucket type, where the last aggregation in the path may either by a single-bucket
+one or a metrics one. If it's a single-bucket type, the order will be defined by the number of docs in the bucket (i.e. `doc_count`),
+in case it's a metrics one, the same rules as above apply (where the path must indicate the metric name to sort by in case of
+a multi-value metrics aggregation, and in case of a single-value metrics aggregation the sort will be applied on that value).
+
+The path must be defined in the following form:
+
+--------------------------------------------------
+AGG_SEPARATOR       :=  '>'
+METRIC_SEPARATOR    :=  '.'
+AGG_NAME            :=  <the name of the aggregation>
+METRIC              :=  <the name of the metric (in case of multi-value metrics aggregation)>
+PATH                :=  <AGG_NAME>[<AGG_SEPARATOR><AGG_NAME>]*[<METRIC_SEPARATOR><METRIC>]
+--------------------------------------------------
+
+[source,js]
+--------------------------------------------------
+{
+    "aggs" : {
+        "countries" : {
+            "terms" : {
+                "field" : "address.country",
+                "order" : { "females>height_stats.avg" : "desc" }
+            },
+            "aggs" : {
+                "females" : {
+                    "filter" : { "term" : { "gender" : { "female" }}},
+                    "aggs" : {
+                        "height_stats" : { "stats" : { "field" : "height" }}
+                    }
+                }
+            }
+        }
+    }
+}
+--------------------------------------------------
+
+The above will sort the countries buckets based on the average height among the female population.
+
 ==== Minimum document count
 
 It is possible to only return terms that match more than a configured number of hits using the `min_doc_count` option:

diff --git a/src/main/java/org/elasticsearch/common/util/AbstractHash.java b/src/main/java/org/elasticsearch/common/util/AbstractHash.java
@@ -77,7 +77,7 @@ static long nextSlot(long curSlot, long mask) {
     }
 
     /**
-     * Get the id associated with key at <code>0 &lte; index &lte; capacity()</code> or -1 if this slot is unused.
+     * Get the id associated with key at <code>0 &lt;= index &lt;= capacity()</code> or -1 if this slot is unused.
      */
     public long id(long index) {
         return ids.get(index) - 1;

diff --git a/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java b/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java
@@ -28,7 +28,9 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 public abstract class Aggregator implements Releasable, ReaderContextAware {
 
@@ -59,6 +61,8 @@ public static enum BucketAggregationMode {
     protected final AggregatorFactories factories;
     protected final Aggregator[] subAggregators;
 
+    private Map<String, Aggregator> subAggregatorbyName;
+
     /**
      * Constructs a new Aggregator.
      *
@@ -113,6 +117,16 @@ public Aggregator[] subAggregators() {
         return subAggregators;
     }
 
+    public Aggregator subAggregator(String aggName) {
+        if (subAggregatorbyName == null) {
+            subAggregatorbyName = new HashMap<String, Aggregator>(subAggregators.length);
+            for (int i = 0; i < subAggregators.length; i++) {
+                subAggregatorbyName.put(subAggregators[i].name, subAggregators[i]);
+            }
+        }
+        return subAggregatorbyName.get(aggName);
+    }
+
     /**
      * @return  The current aggregation context.
      */

diff --git a/src/main/java/org/elasticsearch/search/aggregations/AggregatorParsers.java b/src/main/java/org/elasticsearch/search/aggregations/AggregatorParsers.java
@@ -27,14 +27,18 @@
 
 import java.io.IOException;
 import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 /**
  * A registry for all the aggregator parser, also servers as the main parser for the aggregations module
  */
 public class AggregatorParsers {
 
+    public static final Pattern VALID_AGG_NAME = Pattern.compile("[a-zA-Z0-9\\-_]+");
     private final ImmutableMap<String, Aggregator.Parser> parsers;
 
+
     /**
      * Constructs the AggregatorParsers out of all the given parsers
      *
@@ -78,6 +82,8 @@ private AggregatorFactories parseAggregators(XContentParser parser, SearchContex
         XContentParser.Token token = null;
         String currentFieldName = null;
 
+        Matcher validAggMatcher = VALID_AGG_NAME.matcher("");
+
         AggregatorFactories.Builder factories = new AggregatorFactories.Builder();
 
         String aggregationName = null;
@@ -102,6 +108,9 @@ private AggregatorFactories parseAggregators(XContentParser parser, SearchContex
                             if (aggregatorParser == null) {
                                 throw new SearchParseException(context, "Could not find aggregator type [" + currentFieldName + "]");
                             }
+                            if (!validAggMatcher.reset(aggregationName).matches()) {
+                                throw new SearchParseException(context, "Invalid aggregation name [" + aggregationName + "]. Aggregation names must be alpha-numeric and can only contain '_' and '-'");
+                            }
                             factory = aggregatorParser.parse(aggregationName, parser, context);
                         }
                     }

diff --git a/src/main/java/org/elasticsearch/search/aggregations/HasAggregations.java b/src/main/java/org/elasticsearch/search/aggregations/HasAggregations.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.aggregations;
+
+/**
+ *
+ */
+public interface HasAggregations {
+
+    Aggregations getAggregations();
+
+}
diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java
@@ -84,7 +84,7 @@ protected final void incrementBucketDocCount(int inc, long bucketOrd) throws IOE
     /**
      * Utility method to return the number of documents that fell in the given bucket (identified by the bucket ordinal)
      */
-    protected final long bucketDocCount(long bucketOrd) {
+    public final long bucketDocCount(long bucketOrd) {
         if (bucketOrd >= docCounts.size()) {
             // This may happen eg. if no document in the highest buckets is accepted by a sub aggregator.
             // For example, if there is a long terms agg on 3 terms 1,2,3 with a sub filter aggregator and if no document with 3 as a value

diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/MultiBucketsAggregation.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/MultiBucketsAggregation.java
@@ -19,12 +19,12 @@
 
 package org.elasticsearch.search.aggregations.bucket;
 
-import org.elasticsearch.ElasticsearchIllegalArgumentException;
 import org.elasticsearch.common.text.Text;
 import org.elasticsearch.common.util.Comparators;
 import org.elasticsearch.search.aggregations.Aggregation;
 import org.elasticsearch.search.aggregations.Aggregations;
-import org.elasticsearch.search.aggregations.metrics.MetricsAggregation;
+import org.elasticsearch.search.aggregations.HasAggregations;
+import org.elasticsearch.search.aggregations.support.OrderPath;
 
 import java.util.Collection;
 
@@ -38,7 +38,7 @@ public interface MultiBucketsAggregation extends Aggregation {
      * A bucket represents a criteria to which all documents that fall in it adhere to. It is also uniquely identified
      * by a key, and can potentially hold sub-aggregations computed over all documents in it.
      */
-    public interface Bucket {
+    public interface Bucket extends HasAggregations {
 
         /**
          * @return  The key associated with the bucket as a string
@@ -62,66 +62,28 @@ public interface Bucket {
 
         static class SubAggregationComparator<B extends Bucket> implements java.util.Comparator<B> {
 
-            private final String aggName;
-            private final String valueName;
+            private final OrderPath path;
             private final boolean asc;
 
             public SubAggregationComparator(String expression, boolean asc) {
                 this.asc = asc;
-                int i = expression.indexOf('.');
-                if (i < 0) {
-                    this.aggName = expression;
-                    this.valueName = null;
-                } else {
-                    this.aggName = expression.substring(0, i);
-                    this.valueName = expression.substring(i+1);
-                }
-            }
-
-            public SubAggregationComparator(String aggName, String valueName, boolean asc) {
-                this.aggName = aggName;
-                this.valueName = valueName;
-                this.asc = asc;
+                this.path = OrderPath.parse(expression);
             }
 
             public boolean asc() {
                 return asc;
             }
 
-            public String aggName() {
-                return aggName;
-            }
-
-            public String valueName() {
-                return valueName;
+            public OrderPath path() {
+                return path;
             }
 
             @Override
             public int compare(B b1, B b2) {
-                double v1 = value(b1);
-                double v2 = value(b2);
+                double v1 = path.resolveValue(b1);
+                double v2 = path.resolveValue(b2);
                 return Comparators.compareDiscardNaN(v1, v2, asc);
             }
-
-            private double value(B bucket) {
-                MetricsAggregation aggregation = bucket.getAggregations().get(aggName);
-                if (aggregation == null) {
-                    throw new ElasticsearchIllegalArgumentException("Unknown aggregation named [" + aggName + "]");
-                }
-                if (aggregation instanceof MetricsAggregation.SingleValue) {
-                    //TODO should we throw an exception if the value name is specified?
-                    return ((MetricsAggregation.SingleValue) aggregation).value();
-                }
-                if (aggregation instanceof MetricsAggregation.MultiValue) {
-                    if (valueName == null) {
-                        throw new ElasticsearchIllegalArgumentException("Cannot sort on multi valued aggregation [" + aggName + "]. A value name is required");
-                    }
-                    return ((MetricsAggregation.MultiValue) aggregation).value(valueName);
-                }
-
-                throw new ElasticsearchIllegalArgumentException("A mal attempt to sort terms by aggregation [" + aggregation.getName() +
-                        "]. Terms can only be ordered by either standard order or direct calc aggregators of the terms");
-            }
         }
     }
 

diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/SingleBucketAggregation.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/SingleBucketAggregation.java
@@ -21,11 +21,12 @@
 
 import org.elasticsearch.search.aggregations.Aggregation;
 import org.elasticsearch.search.aggregations.Aggregations;
+import org.elasticsearch.search.aggregations.HasAggregations;
 
 /**
  * A single bucket aggregation
  */
-public interface SingleBucketAggregation extends Aggregation {
+public interface SingleBucketAggregation extends Aggregation, HasAggregations {
 
     /**
      * @return  The number of documents in this bucket

diff --git a/...main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramParser.java b/...main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramParser.java
@@ -238,11 +238,7 @@ private static InternalOrder resolveOrder(String key, boolean asc) {
         if ("_count".equals(key)) {
             return (InternalOrder) (asc ? InternalOrder.COUNT_ASC : InternalOrder.COUNT_DESC);
         }
-        int i = key.indexOf('.');
-        if (i < 0) {
-            return new InternalOrder.Aggregation(key, null, asc);
-        }
-        return new InternalOrder.Aggregation(key.substring(0, i), key.substring(i + 1), asc);
+        return new InternalOrder.Aggregation(key, asc);
     }
 
     private long parseOffset(String offset) throws IOException {

diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/Histogram.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/Histogram.java
@@ -20,7 +20,6 @@
 
 import com.google.common.primitives.Longs;
 import org.elasticsearch.common.xcontent.ToXContent;
-import org.elasticsearch.search.aggregations.Aggregation;
 import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
 
 import java.util.Collection;
@@ -110,11 +109,11 @@ public int compare(InternalHistogram.Bucket b1, InternalHistogram.Bucket b2) {
         /**
          * Creates a bucket ordering strategy that sorts buckets based on a single-valued calc sug-aggregation
          *
-         * @param aggregationName the name of the aggregation
+         * @param path the name of the aggregation
          * @param asc             The direction of the order (ascending or descending)
          */
-        public static Order aggregation(String aggregationName, boolean asc) {
-            return new InternalOrder.Aggregation(aggregationName, null, asc);
+        public static Order aggregation(String path, boolean asc) {
+            return new InternalOrder.Aggregation(path, asc);
         }
 
         /**
@@ -125,7 +124,7 @@ public static Order aggregation(String aggregationName, boolean asc) {
          * @param asc             The direction of the order (ascending or descending)
          */
         public static Order aggregation(String aggregationName, String valueName, boolean asc) {
-            return new InternalOrder.Aggregation(aggregationName, valueName, asc);
+            return new InternalOrder.Aggregation(aggregationName + "." + valueName, asc);
         }
 
         /**

diff --git a/...main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregator.java b/...main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregator.java
@@ -23,12 +23,12 @@
 import org.elasticsearch.common.inject.internal.Nullable;
 import org.elasticsearch.common.lease.Releasables;
 import org.elasticsearch.common.rounding.Rounding;
+import org.elasticsearch.common.util.LongHash;
 import org.elasticsearch.index.fielddata.LongValues;
 import org.elasticsearch.search.aggregations.Aggregator;
 import org.elasticsearch.search.aggregations.AggregatorFactories;
 import org.elasticsearch.search.aggregations.InternalAggregation;
 import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
-import org.elasticsearch.common.util.LongHash;
 import org.elasticsearch.search.aggregations.support.AggregationContext;
 import org.elasticsearch.search.aggregations.support.ValueSourceAggregatorFactory;
 import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;

diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramParser.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramParser.java
@@ -159,10 +159,6 @@ static InternalOrder resolveOrder(String key, boolean asc) {
         if ("_count".equals(key)) {
             return (InternalOrder) (asc ? InternalOrder.COUNT_ASC : InternalOrder.COUNT_DESC);
         }
-        int i = key.indexOf('.');
-        if (i < 0) {
-            return new InternalOrder.Aggregation(key, null, asc);
-        }
-        return new InternalOrder.Aggregation(key.substring(0, i), key.substring(i + 1), asc);
+        return new InternalOrder.Aggregation(key, asc);
     }
 }