opensearch-project · finnegancarroll · Jan 8, 2025 · Jan 14, 2025 · Jan 14, 2025 · Jan 14, 2025
@@ -104,6 +104,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Fix multi-value sort for unsigned long ([#16732](https://github.com/opensearch-project/OpenSearch/pull/16732))
 - The `phone-search` analyzer no longer emits the tel/sip prefix, international calling code, extension numbers and unformatted input as a token ([#16993](https://github.com/opensearch-project/OpenSearch/pull/16993))
 - Fix GRPC AUX_TRANSPORT_PORT and SETTING_GRPC_PORT settings and remove lingering HTTP terminology ([#17037](https://github.com/opensearch-project/OpenSearch/pull/17037))
+- Fix AutoDateHistogramAggregator rounding assertion failure ([#17023](https://github.com/opensearch-project/OpenSearch/pull/17023))
 
 ### Security
 

@@ -489,7 +489,7 @@ public double roundingSize(long utcMillis, DateTimeUnit timeUnit) {
      *
      * @opensearch.internal
      */
-    static class TimeUnitRounding extends Rounding {
+    public static class TimeUnitRounding extends Rounding {
         static final byte ID = 1;
 
         private final DateTimeUnit unit;
@@ -517,6 +517,10 @@ public byte id() {
             return ID;
         }
 
+        public ZoneId getTimeZone() {
+            return timeZone;
+        }
+
         private LocalDateTime truncateLocalDateTime(LocalDateTime localDateTime) {
             switch (unit) {
                 case SECOND_OF_MINUTE:
@@ -1382,16 +1386,8 @@ public static OptionalLong getInterval(Rounding rounding) {
 
         if (rounding instanceof TimeUnitRounding) {
             interval = (((TimeUnitRounding) rounding).unit).extraLocalOffsetLookup();
-            if (!isUTCTimeZone(((TimeUnitRounding) rounding).timeZone)) {
-                // Fast filter aggregation cannot be used if it needs time zone rounding
-                return OptionalLong.empty();
-            }
         } else if (rounding instanceof TimeIntervalRounding) {
             interval = ((TimeIntervalRounding) rounding).interval;
-            if (!isUTCTimeZone(((TimeIntervalRounding) rounding).timeZone)) {
-                // Fast filter aggregation cannot be used if it needs time zone rounding
-                return OptionalLong.empty();
-            }
         } else {
             return OptionalLong.empty();
         }
@@ -1403,7 +1399,7 @@ public static OptionalLong getInterval(Rounding rounding) {
      * Helper function for checking if the time zone requested for date histogram
      * aggregation is utc or not
      */
-    private static boolean isUTCTimeZone(final ZoneId zoneId) {
+    public static boolean isUTCTimeZone(final ZoneId zoneId) {
         return "Z".equals(zoneId.getDisplayName(TextStyle.FULL, Locale.ENGLISH));
     }
 }
@@ -286,6 +286,10 @@ public DateMathParser getDateMathParser() {
             return parser;
         }
 
+        public ZoneId getZoneId() {
+            return timeZone;
+        }
+
         @Override
         public String format(long value) {
             return formatter.format(resolution.toInstant(value).atZone(timeZone));

@@ -82,6 +82,7 @@
 import org.opensearch.search.sort.SortAndFormats;
 
 import java.io.IOException;
+import java.time.ZoneId;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -93,6 +94,7 @@
 import java.util.function.LongUnaryOperator;
 import java.util.stream.Collectors;
 
+import static org.opensearch.common.Rounding.isUTCTimeZone;
 import static org.opensearch.search.aggregations.MultiBucketConsumerService.MAX_BUCKET_SETTING;
 import static org.opensearch.search.aggregations.bucket.filterrewrite.DateHistogramAggregatorBridge.segmentMatchAll;
 
@@ -182,6 +184,18 @@ protected boolean canOptimize() {
                         });
                     }
 
+                    /**
+                     * The filter rewrite optimized path does not support bucket intervals which are not fixed.
+                     * For this reason we exclude non UTC timezones.
+                     */
+                    Rounding round = this.valuesSource.getRounding();
+                    if (round instanceof Rounding.TimeUnitRounding) {
+                        ZoneId tz = ((Rounding.TimeUnitRounding) round).getTimeZone();
+                        if (!isUTCTimeZone(tz)) {
+                            return false;
+                        }
+                    }
+
                     // bucketOrds is used for saving the date histogram results got from the optimization path
                     bucketOrds = LongKeyedBucketOrds.build(context.bigArrays(), CardinalityUpperBound.ONE);
                     return true;

@@ -14,6 +14,7 @@
 import org.opensearch.common.Rounding;
 import org.opensearch.index.mapper.DateFieldMapper;
 import org.opensearch.index.mapper.MappedFieldType;
+import org.opensearch.search.DocValueFormat;
 import org.opensearch.search.aggregations.bucket.histogram.LongBounds;
 import org.opensearch.search.aggregations.support.ValuesSourceConfig;
 import org.opensearch.search.internal.SearchContext;
@@ -23,6 +24,7 @@
 import java.util.function.BiConsumer;
 import java.util.function.Function;
 
+import static org.opensearch.common.Rounding.isUTCTimeZone;
 import static org.opensearch.search.aggregations.bucket.filterrewrite.PointTreeTraversal.multiRangesTraverse;
 
 /**
@@ -33,6 +35,14 @@ public abstract class DateHistogramAggregatorBridge extends AggregatorBridge {
     int maxRewriteFilters;
 
     protected boolean canOptimize(ValuesSourceConfig config) {
+        /**
+         * The filter rewrite optimized path does not support bucket intervals which are not fixed.
+         * For this reason we exclude non UTC timezones.
+         */
+        if (config.format() instanceof DocValueFormat.DateTime && !isUTCTimeZone(((DocValueFormat.DateTime) config.format()).getZoneId())) {
+            return false;
+        }
+
         if (config.script() == null && config.missing() == null) {
             MappedFieldType fieldType = config.fieldType();
             if (fieldType instanceof DateFieldMapper.DateFieldType) {

@@ -149,7 +149,6 @@ private AutoDateHistogramAggregator(
         Aggregator parent,
         Map<String, Object> metadata
     ) throws IOException {
-
         super(name, factories, aggregationContext, parent, metadata);
         this.targetBuckets = targetBuckets;
         // TODO: Remove null usage here, by using a different aggregator for create
@@ -170,14 +169,26 @@ protected void prepare() throws IOException {
                 buildRanges(context);
             }
 
+            /**
+             * The filter rewrite optimization uses this method to pre-emptively update the preparedRounding
+             * when considering the optimized path for a single segment. This is necessary since the optimized path
+             * skips doc collection entirely which is where the preparedRounding is normally updated.
+             *
+             * @param low lower bound of rounding to prepare
+             * @param high upper bound of rounding to prepare
+             * @return select a prepared rounding which satisfies the conditions:
+             * 1. Is at least as large as our previously prepared rounding
+             * 2. Must span a range of [low, high] with buckets <= targetBuckets
+             */
             @Override
             protected Rounding getRounding(final long low, final long high) {
                 // max - min / targetBuckets = bestDuration
                 // find the right innerInterval this bestDuration belongs to
                 // since we cannot exceed targetBuckets, bestDuration should go up,
                 // so the right innerInterval should be an upper bound
                 long bestDuration = (high - low) / targetBuckets;
-                // reset so this function is idempotent
+
+                int prevRoundingIdx = roundingIdx;
                 roundingIdx = 0;
                 while (roundingIdx < roundingInfos.length - 1) {
                     final RoundingInfo curRoundingInfo = roundingInfos[roundingIdx];
@@ -190,7 +201,12 @@ protected Rounding getRounding(final long low, final long high) {
                     roundingIdx++;
                 }
 
-                preparedRounding = prepareRounding(roundingIdx);
+                // Ensure preparedRounding never shrinks
+                roundingIdx = Math.max(prevRoundingIdx, roundingIdx);
+                if (roundingIdx != prevRoundingIdx) {
+                    preparedRounding = prepareRounding(roundingIdx);
+                }
+
                 return roundingInfos[roundingIdx].rounding;
             }
 
@@ -403,12 +419,39 @@ private void collectValue(int doc, long rounded) throws IOException {
                     increaseRoundingIfNeeded(rounded);
                 }
 
+                /**
+                 * Examine our current bucket count and the most recently added bucket to determine if an update to
+                 * preparedRounding is required to keep total bucket count in compliance with targetBuckets.
+                 *
+                 * @param rounded the most recently collected value rounded
+                 */
                 private void increaseRoundingIfNeeded(long rounded) {
+                    // If we are already using the rounding with the largest interval nothing can be done
                     if (roundingIdx >= roundingInfos.length - 1) {
                         return;
                     }
+
+                    // Re calculate the max and min values we expect to bucket according to most recently rounded val
                     min = Math.min(min, rounded);
                     max = Math.max(max, rounded);
+
+                    /**
+                     * Quick explanation of the two below conditions:
+                     *
+                     * 1. [targetBuckets * roundingInfos[roundingIdx].getMaximumInnerInterval()]
+                     * Represents the total bucket count possible before we will exceed targetBuckets
+                     * even if we use the maximum inner interval of our current rounding. For example, consider the
+                     * DAYS_OF_MONTH rounding where the maximum inner interval is 7 days (i.e. 1 week buckets).
+                     * targetBuckets * roundingInfos[roundingIdx].getMaximumInnerInterval() would then be the number of
+                     * 1 day buckets possible such that if we re-bucket to 1 week buckets we will have more 1 week buckets
+                     * than our targetBuckets limit. If the current count of buckets exceeds this limit we must update
+                     * our rounding.
+                     *
+                     * 2. [targetBuckets * roundingInfos[roundingIdx].getMaximumRoughEstimateDurationMillis()]
+                     * The total duration of ms covered by our current rounding. In the case of MINUTES_OF_HOUR rounding
+                     * getMaximumRoughEstimateDurationMillis is 60000. If our current total range in millis (max - min)
+                     * exceeds this range we must update our rounding.
+                     */
                     if (bucketOrds.size() <= targetBuckets * roundingInfos[roundingIdx].getMaximumInnerInterval()
                         && max - min <= targetBuckets * roundingInfos[roundingIdx].getMaximumRoughEstimateDurationMillis()) {
                         return;

@@ -38,7 +38,9 @@
 import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.NoMergePolicy;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.MatchNoDocsQuery;
@@ -72,6 +74,7 @@
 import java.time.Instant;
 import java.time.LocalDate;
 import java.time.YearMonth;
+import java.time.ZoneId;
 import java.time.ZoneOffset;
 import java.time.ZonedDateTime;
 import java.util.ArrayList;
@@ -912,6 +915,58 @@ public void testWithPipelineReductions() throws IOException {
         );
     }
 
+    // Bugfix: https://github.com/opensearch-project/OpenSearch/issues/16932
+    public void testFilterRewriteWithTZRoundingRangeAssert() throws IOException {
+        /*
+        multiBucketIndexData must overlap with DST to produce a 'LinkedListLookup' prepared rounding.
+        This lookup rounding style maintains a strict max/min input range and will assert each value is in range.
+         */
+        final List<ZonedDateTime> multiBucketIndexData = Arrays.asList(
+            ZonedDateTime.of(2023, 10, 10, 0, 0, 0, 0, ZoneOffset.UTC),
+            ZonedDateTime.of(2023, 11, 11, 0, 0, 0, 0, ZoneOffset.UTC)
+        );
+
+        final List<ZonedDateTime> singleBucketIndexData = Arrays.asList(ZonedDateTime.of(2023, 12, 27, 0, 0, 0, 0, ZoneOffset.UTC));
+
+        try (Directory directory = newDirectory()) {
+            /*
+            Ensure we produce two segments on one shard such that the documents in seg 1 will be out of range of the
+            prepared rounding produced by the filter rewrite optimization considering seg 2 for optimized path.
+            */
+            IndexWriterConfig c = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
+            try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory, c)) {
+                indexSampleData(multiBucketIndexData, indexWriter);
+                indexWriter.flush();
+                indexSampleData(singleBucketIndexData, indexWriter);
+            }
+
+            try (IndexReader indexReader = DirectoryReader.open(directory)) {
+                final IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
+
+                // Force agg to update rounding when it begins collecting from the second segment.
+                final AutoDateHistogramAggregationBuilder aggregationBuilder = new AutoDateHistogramAggregationBuilder("_name");
+                aggregationBuilder.setNumBuckets(3).field(DATE_FIELD).timeZone(ZoneId.of("America/New_York"));
+
+                Map<String, Integer> expectedDocCount = new TreeMap<>();
+                expectedDocCount.put("2023-10-01T00:00:00.000-04:00", 1);
+                expectedDocCount.put("2023-11-01T00:00:00.000-04:00", 1);
+                expectedDocCount.put("2023-12-01T00:00:00.000-05:00", 1);
+
+                final InternalAutoDateHistogram histogram = searchAndReduce(
+                    indexSearcher,
+                    DEFAULT_QUERY,
+                    aggregationBuilder,
+                    false,
+                    new DateFieldMapper.DateFieldType(aggregationBuilder.field()),
+                    new NumberFieldMapper.NumberFieldType(INSTANT_FIELD, NumberFieldMapper.NumberType.LONG),
+                    new NumberFieldMapper.NumberFieldType(NUMERIC_FIELD, NumberFieldMapper.NumberType.LONG)
+                );
+
+                assertThat(bucketCountsAsMap(histogram), equalTo(expectedDocCount));
+            }
+        }
+    }
+
     @Override
     protected IndexSettings createIndexSettings() {
         final Settings nodeSettings = Settings.builder().put("search.max_buckets", 25000).build();

@@ -609,9 +609,19 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
         IndexSearcher searcher,
         Query query,
         AggregationBuilder builder,
+        boolean shardFanOut,
         MappedFieldType... fieldTypes
     ) throws IOException {
-        return searchAndReduce(createIndexSettings(), searcher, query, builder, DEFAULT_MAX_BUCKETS, fieldTypes);
+        return searchAndReduce(createIndexSettings(), searcher, query, builder, DEFAULT_MAX_BUCKETS, shardFanOut, fieldTypes);
+    }
+
+    protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduce(
+        IndexSearcher searcher,
+        Query query,
+        AggregationBuilder builder,
+        MappedFieldType... fieldTypes
+    ) throws IOException {
+        return searchAndReduce(createIndexSettings(), searcher, query, builder, DEFAULT_MAX_BUCKETS, randomBoolean(), fieldTypes);
     }
 
     protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduce(
@@ -621,7 +631,7 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
         AggregationBuilder builder,
         MappedFieldType... fieldTypes
     ) throws IOException {
-        return searchAndReduce(indexSettings, searcher, query, builder, DEFAULT_MAX_BUCKETS, fieldTypes);
+        return searchAndReduce(indexSettings, searcher, query, builder, DEFAULT_MAX_BUCKETS, randomBoolean(), fieldTypes);
     }
 
     protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduce(
@@ -631,7 +641,7 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
         int maxBucket,
         MappedFieldType... fieldTypes
     ) throws IOException {
-        return searchAndReduce(createIndexSettings(), searcher, query, builder, maxBucket, fieldTypes);
+        return searchAndReduce(createIndexSettings(), searcher, query, builder, maxBucket, randomBoolean(), fieldTypes);
     }
 
     protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduce(
@@ -640,9 +650,10 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
         Query query,
         AggregationBuilder builder,
         int maxBucket,
+        boolean shardFanOut,
         MappedFieldType... fieldTypes
     ) throws IOException {
-        return searchAndReduce(indexSettings, searcher, query, builder, maxBucket, false, fieldTypes);
+        return searchAndReduce(indexSettings, searcher, query, builder, maxBucket, false, shardFanOut, fieldTypes);
     }
 
     /**
@@ -660,6 +671,7 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
         AggregationBuilder builder,
         int maxBucket,
         boolean hasNested,
+        boolean shardFanOut,
         MappedFieldType... fieldTypes
     ) throws IOException {
         final IndexReaderContext ctx = searcher.getTopReaderContext();
@@ -675,7 +687,7 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
         );
         C root = createAggregator(query, builder, searcher, bucketConsumer, fieldTypes);
 
-        if (randomBoolean() && searcher.getIndexReader().leaves().size() > 0) {
+        if (shardFanOut && searcher.getIndexReader().leaves().size() > 0) {
             assertThat(ctx, instanceOf(CompositeReaderContext.class));
             final CompositeReaderContext compCTX = (CompositeReaderContext) ctx;
             final int size = compCTX.leaves().size();