From 59285c02385bb13bf98f710c6e683b67a2af041c Mon Sep 17 00:00:00 2001 From: gashutos Date: Thu, 8 Jun 2023 19:32:31 +0530 Subject: [PATCH 1/4] Revert "Revert "Time series based workload desc order optimization through reverse segment read (#7244)" (#7892)" This reverts commit bb265369d67431a7aa15efca8734326857db7e32. Signed-off-by: gashutos --- .../opensearch.release-notes-2.8.0.md | 1 + .../cluster/metadata/DataStream.java | 23 +++++++++++++ .../org/opensearch/index/IndexSettings.java | 12 +++++++ .../opensearch/index/engine/EngineConfig.java | 19 +++++++++++ .../index/engine/EngineConfigFactory.java | 6 +++- .../index/engine/InternalEngine.java | 3 ++ .../index/mapper/MappingLookup.java | 10 ++++++ .../opensearch/index/shard/IndexShard.java | 16 +++++++++- .../search/internal/ContextIndexSearcher.java | 32 +++++++++++++++++-- .../engine/EngineConfigFactoryTests.java | 6 ++-- .../test/OpenSearchIntegTestCase.java | 1 + 11 files changed, 123 insertions(+), 6 deletions(-) diff --git a/release-notes/opensearch.release-notes-2.8.0.md b/release-notes/opensearch.release-notes-2.8.0.md index 556e1f0047595..4f7dc38997e1e 100644 --- a/release-notes/opensearch.release-notes-2.8.0.md +++ b/release-notes/opensearch.release-notes-2.8.0.md @@ -10,6 +10,7 @@ - [Search Pipelines] Add RenameFieldResponseProcessor for Search Pipelines ([#7377](https://github.com/opensearch-project/OpenSearch/pull/7377)) - [Search Pipelines] Split search pipeline processor factories by type ([#7597](https://github.com/opensearch-project/OpenSearch/pull/7597)) - [Search Pipelines] Add script processor ([#7607](https://github.com/opensearch-project/OpenSearch/pull/7607)) +- Add descending order search optimization through reverse segment read. ([#7244](https://github.com/opensearch-project/OpenSearch/pull/7244)) - Add 'unsigned_long' numeric field type ([#6237](https://github.com/opensearch-project/OpenSearch/pull/6237)) - Add back primary shard preference for queries ([#7375](https://github.com/opensearch-project/OpenSearch/pull/7375)) - Add task cancellation timestamp in task API ([#7455](https://github.com/opensearch-project/OpenSearch/pull/7455)) diff --git a/server/src/main/java/org/opensearch/cluster/metadata/DataStream.java b/server/src/main/java/org/opensearch/cluster/metadata/DataStream.java index 825aaee1ad1f8..f4be1cfff489c 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/DataStream.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/DataStream.java @@ -31,6 +31,10 @@ package org.opensearch.cluster.metadata; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.PointValues; +import org.opensearch.OpenSearchException; import org.opensearch.cluster.AbstractDiffable; import org.opensearch.cluster.Diff; import org.opensearch.core.ParseField; @@ -46,6 +50,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; import java.util.List; import java.util.Locale; import java.util.Map; @@ -59,6 +64,24 @@ public final class DataStream extends AbstractDiffable implements ToXContentObject { public static final String BACKING_INDEX_PREFIX = ".ds-"; + public static final String TIMESERIES_FIELDNAME = "@timestamp"; + public static final Comparator TIMESERIES_LEAF_SORTER = Comparator.comparingLong((LeafReader r) -> { + try { + PointValues points = r.getPointValues(TIMESERIES_FIELDNAME); + if (points != null) { + // could be a multipoint (probably not) but get the maximum time value anyway + byte[] sortValue = points.getMaxPackedValue(); + // decode the first dimension because this should not be a multi dimension field + // it's a bug in the date field if it is + return LongPoint.decodeDimension(sortValue, 0); + } else { + // segment does not have a timestamp field, just return the minimum value + return Long.MIN_VALUE; + } + } catch (IOException e) { + throw new OpenSearchException("Not a timeseries Index! Field [{}] not found!", TIMESERIES_FIELDNAME); + } + }).reversed(); private final String name; private final TimestampField timeStampField; diff --git a/server/src/main/java/org/opensearch/index/IndexSettings.java b/server/src/main/java/org/opensearch/index/IndexSettings.java index e125facb76059..de7dc102939ce 100644 --- a/server/src/main/java/org/opensearch/index/IndexSettings.java +++ b/server/src/main/java/org/opensearch/index/IndexSettings.java @@ -665,6 +665,7 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) { private volatile long mappingTotalFieldsLimit; private volatile long mappingDepthLimit; private volatile long mappingFieldNameLengthLimit; + private volatile boolean searchSegmentOrderReversed; /** * The maximum number of refresh listeners allows on this shard. @@ -905,6 +906,10 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti scopedSettings.addSettingsUpdateConsumer(DEFAULT_SEARCH_PIPELINE, this::setDefaultSearchPipeline); } + private void setSearchSegmentOrderReversed(boolean reversed) { + this.searchSegmentOrderReversed = reversed; + } + private void setSearchIdleAfter(TimeValue searchIdleAfter) { this.searchIdleAfter = searchIdleAfter; } @@ -1084,6 +1089,13 @@ public Settings getNodeSettings() { return nodeSettings; } + /** + * Returns true if index level setting for leaf reverse order search optimization is enabled + */ + public boolean getSearchSegmentOrderReversed() { + return this.searchSegmentOrderReversed; + } + /** * Updates the settings and index metadata and notifies all registered settings consumers with the new settings iff at least one * setting has changed. diff --git a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java index fe003405fd3f8..338a541af387a 100644 --- a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java +++ b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java @@ -33,6 +33,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.search.QueryCache; import org.apache.lucene.search.QueryCachingPolicy; @@ -59,6 +60,7 @@ import org.opensearch.indices.breaker.CircuitBreakerService; import org.opensearch.threadpool.ThreadPool; +import java.util.Comparator; import java.util.List; import java.util.Objects; import java.util.function.BooleanSupplier; @@ -102,6 +104,7 @@ public final class EngineConfig { private final Supplier retentionLeasesSupplier; private final boolean isReadOnlyReplica; private final BooleanSupplier primaryModeSupplier; + private final Comparator leafSorter; /** * A supplier of the outstanding retention leases. This is used during merged operations to determine which operations that have been @@ -204,6 +207,7 @@ private EngineConfig(Builder builder) { this.isReadOnlyReplica = builder.isReadOnlyReplica; this.primaryModeSupplier = builder.primaryModeSupplier; this.translogFactory = builder.translogFactory; + this.leafSorter = builder.leafSorter; } /** @@ -451,6 +455,15 @@ public TranslogDeletionPolicyFactory getCustomTranslogDeletionPolicyFactory() { return translogDeletionPolicyFactory; } + /** + * Returns subReaderSorter for org.apache.lucene.index.BaseCompositeReader. + * This gets used in lucene IndexReader and decides order of segment read. + * @return comparator + */ + public Comparator getLeafSorter() { + return this.leafSorter; + } + /** * Builder for EngineConfig class * @@ -483,6 +496,7 @@ public static class Builder { private boolean isReadOnlyReplica; private BooleanSupplier primaryModeSupplier; private TranslogFactory translogFactory = new InternalTranslogFactory(); + Comparator leafSorter; public Builder shardId(ShardId shardId) { this.shardId = shardId; @@ -614,6 +628,11 @@ public Builder translogFactory(TranslogFactory translogFactory) { return this; } + public Builder leafSorter(Comparator leafSorter) { + this.leafSorter = leafSorter; + return this; + } + public EngineConfig build() { return new EngineConfig(this); } diff --git a/server/src/main/java/org/opensearch/index/engine/EngineConfigFactory.java b/server/src/main/java/org/opensearch/index/engine/EngineConfigFactory.java index f5a5d50e11220..76b13ee244a2c 100644 --- a/server/src/main/java/org/opensearch/index/engine/EngineConfigFactory.java +++ b/server/src/main/java/org/opensearch/index/engine/EngineConfigFactory.java @@ -10,6 +10,7 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.search.QueryCache; import org.apache.lucene.search.QueryCachingPolicy; @@ -36,6 +37,7 @@ import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.List; import java.util.Optional; import java.util.function.BooleanSupplier; @@ -151,7 +153,8 @@ public EngineConfig newEngineConfig( EngineConfig.TombstoneDocSupplier tombstoneDocSupplier, boolean isReadOnlyReplica, BooleanSupplier primaryModeSupplier, - TranslogFactory translogFactory + TranslogFactory translogFactory, + Comparator leafSorter ) { CodecService codecServiceToUse = codecService; if (codecService == null && this.codecServiceFactory != null) { @@ -184,6 +187,7 @@ public EngineConfig newEngineConfig( .readOnlyReplica(isReadOnlyReplica) .primaryModeSupplier(primaryModeSupplier) .translogFactory(translogFactory) + .leafSorter(leafSorter) .build(); } diff --git a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java index 63afc6585a99d..b96bade177be2 100644 --- a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java @@ -2322,6 +2322,9 @@ private IndexWriterConfig getIndexWriterConfig() { if (config().getIndexSort() != null) { iwc.setIndexSort(config().getIndexSort()); } + if (config().getLeafSorter() != null) { + iwc.setLeafSorter(config().getLeafSorter()); // The default segment search order + } return iwc; } diff --git a/server/src/main/java/org/opensearch/index/mapper/MappingLookup.java b/server/src/main/java/org/opensearch/index/mapper/MappingLookup.java index 5bccb4f6e827e..024f4b71584bf 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MappingLookup.java +++ b/server/src/main/java/org/opensearch/index/mapper/MappingLookup.java @@ -33,6 +33,7 @@ package org.opensearch.index.mapper; import org.apache.lucene.analysis.Analyzer; +import org.opensearch.cluster.metadata.DataStream; import org.opensearch.index.IndexSettings; import org.opensearch.index.analysis.FieldNameAnalyzer; @@ -261,6 +262,15 @@ public String getNestedScope(String path) { return null; } + /** + * If this index contains @timestamp field with Date type, it will return true + * @return true or false based on above condition + */ + public boolean containsTimeStampField() { + MappedFieldType timeSeriesFieldType = this.fieldTypeLookup.get(DataStream.TIMESERIES_FIELDNAME); + return timeSeriesFieldType != null && timeSeriesFieldType instanceof DateFieldMapper.DateFieldType; // has to be Date field type + } + private static String parentObject(String field) { int lastDot = field.lastIndexOf('.'); if (lastDot == -1) { diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index fc236d7d97a04..c0e7a8278e821 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -58,6 +58,7 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.ThreadInterruptedException; import org.opensearch.common.lucene.store.ByteArrayIndexInput; +import org.opensearch.cluster.metadata.DataStream; import org.opensearch.core.Assertions; import org.opensearch.ExceptionsHelper; import org.opensearch.OpenSearchException; @@ -333,6 +334,7 @@ Runnable getGlobalCheckpointSyncer() { private volatile boolean useRetentionLeasesInPeerRecovery; private final Store remoteStore; private final BiFunction translogFactorySupplier; + private final boolean isTimeSeriesIndex; private final RemoteRefreshSegmentPressureService remoteRefreshSegmentPressureService; public IndexShard( @@ -451,6 +453,9 @@ public boolean shouldCache(Query query) { this.checkpointPublisher = checkpointPublisher; this.remoteStore = remoteStore; this.translogFactorySupplier = translogFactorySupplier; + this.isTimeSeriesIndex = (mapperService == null || mapperService.documentMapper() == null) + ? false + : mapperService.documentMapper().mappers().containsTimeStampField(); this.remoteRefreshSegmentPressureService = remoteRefreshSegmentPressureService; } @@ -3627,7 +3632,8 @@ private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) thro tombstoneDocSupplier(), isReadOnlyReplica, replicationTracker::isPrimaryMode, - translogFactorySupplier.apply(indexSettings, shardRouting) + translogFactorySupplier.apply(indexSettings, shardRouting), + isTimeSeriesIndex ? DataStream.TIMESERIES_LEAF_SORTER : null // DESC @timestamp default order for timeseries ); } @@ -4670,4 +4676,12 @@ RetentionLeaseSyncer getRetentionLeaseSyncer() { public GatedCloseable getSegmentInfosSnapshot() { return getEngine().getSegmentInfosSnapshot(); } + + /** + * If index is time series (if it contains @timestamp field) + * @return true or false based on above condition + */ + public boolean isTimeSeriesIndex() { + return this.isTimeSeriesIndex; + } } diff --git a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java index cf21712fc912a..1a84aba5fff7d 100644 --- a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java +++ b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java @@ -75,6 +75,7 @@ import org.opensearch.search.query.QuerySearchResult; import org.opensearch.search.sort.FieldSortBuilder; import org.opensearch.search.sort.MinAndMax; +import org.opensearch.search.sort.SortOrder; import java.io.IOException; import java.util.ArrayList; @@ -282,8 +283,17 @@ public void search( @Override protected void search(List leaves, Weight weight, Collector collector) throws IOException { - for (LeafReaderContext ctx : leaves) { // search each subreader - searchLeaf(ctx, weight, collector); + if (shouldReverseLeafReaderContexts()) { + // reverse the segment search order if this flag is true. + // Certain queries can benefit if we reverse the segment read order, + // for example time series based queries if searched for desc sort order. + for (int i = leaves.size() - 1; i >= 0; i--) { + searchLeaf(leaves.get(i), weight, collector); + } + } else { + for (int i = 0; i < leaves.size(); i++) { + searchLeaf(leaves.get(i), weight, collector); + } } } @@ -496,4 +506,22 @@ private boolean canMatchSearchAfter(LeafReaderContext ctx) throws IOException { } return true; } + + private boolean shouldReverseLeafReaderContexts() { + // Time series based workload by default traverses segments in desc order i.e. latest to the oldest order. + // This is actually beneficial for search queries to start search on latest segments first for time series workload. + // That can slow down ASC order queries on timestamp workload. So to avoid that slowdown, we will reverse leaf + // reader order here. + if (searchContext != null && searchContext.indexShard().isTimeSeriesIndex()) { + // Only reverse order for asc order sort queries + if (searchContext.request() != null + && searchContext.request().source() != null + && searchContext.request().source().sorts() != null + && searchContext.request().source().sorts().size() > 0 + && searchContext.request().source().sorts().get(0).order() == SortOrder.ASC) { + return true; + } + } + return false; + } } diff --git a/server/src/test/java/org/opensearch/index/engine/EngineConfigFactoryTests.java b/server/src/test/java/org/opensearch/index/engine/EngineConfigFactoryTests.java index 2db3cd24da80d..f8bedc76ea994 100644 --- a/server/src/test/java/org/opensearch/index/engine/EngineConfigFactoryTests.java +++ b/server/src/test/java/org/opensearch/index/engine/EngineConfigFactoryTests.java @@ -69,7 +69,8 @@ public void testCreateEngineConfigFromFactory() { null, false, () -> Boolean.TRUE, - new InternalTranslogFactory() + new InternalTranslogFactory(), + null ); assertNotNull(config.getCodec()); @@ -148,7 +149,8 @@ public void testCreateCodecServiceFromFactory() { null, false, () -> Boolean.TRUE, - new InternalTranslogFactory() + new InternalTranslogFactory(), + null ); assertNotNull(config.getCodec()); } diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java index 06f74a55be13d..4474aae1f0631 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java @@ -761,6 +761,7 @@ public Settings indexSettings() { ).getStringRep() ); } + return builder.build(); } From d01c3c152c0b42fb351741efa6f06e70f8e27b9b Mon Sep 17 00:00:00 2001 From: gashutos Date: Thu, 8 Jun 2023 20:21:50 +0530 Subject: [PATCH 2/4] Enable time series optimization only if it is not IndexSorted index, also ASC order reverse should only consider in @timestamp field Signed-off-by: gashutos --- .../opensearch/index/shard/IndexShard.java | 19 ++++++++++--------- .../search/internal/ContextIndexSearcher.java | 16 +++++++++------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index c0e7a8278e821..ed38c561c7e29 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -3633,7 +3633,8 @@ private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) thro isReadOnlyReplica, replicationTracker::isPrimaryMode, translogFactorySupplier.apply(indexSettings, shardRouting), - isTimeSeriesIndex ? DataStream.TIMESERIES_LEAF_SORTER : null // DESC @timestamp default order for timeseries + isTimeSeriesDescSortOptimizationEnabled() ? DataStream.TIMESERIES_LEAF_SORTER : null // DESC @timestamp default order for + // timeseries ); } @@ -3645,6 +3646,14 @@ public boolean isRemoteTranslogEnabled() { return indexSettings() != null && indexSettings().isRemoteTranslogStoreEnabled(); } + /** + * @return true if segment reverse search optimization is enabled for time series based workload. + */ + public boolean isTimeSeriesDescSortOptimizationEnabled() { + // Do not change segment order in case of index sort. + return isTimeSeriesIndex && getIndexSort() == null; + } + /** * @return True if settings indicate this shard is backed by a remote snapshot, false otherwise. */ @@ -4676,12 +4685,4 @@ RetentionLeaseSyncer getRetentionLeaseSyncer() { public GatedCloseable getSegmentInfosSnapshot() { return getEngine().getSegmentInfosSnapshot(); } - - /** - * If index is time series (if it contains @timestamp field) - * @return true or false based on above condition - */ - public boolean isTimeSeriesIndex() { - return this.isTimeSeriesIndex; - } } diff --git a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java index 1a84aba5fff7d..f02d28c2c375b 100644 --- a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java +++ b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java @@ -62,6 +62,7 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.CombinedBitSet; import org.apache.lucene.util.SparseFixedBitSet; +import org.opensearch.cluster.metadata.DataStream; import org.opensearch.common.lucene.search.TopDocsAndMaxScore; import org.opensearch.core.common.lease.Releasable; import org.opensearch.search.DocValueFormat; @@ -75,7 +76,6 @@ import org.opensearch.search.query.QuerySearchResult; import org.opensearch.search.sort.FieldSortBuilder; import org.opensearch.search.sort.MinAndMax; -import org.opensearch.search.sort.SortOrder; import java.io.IOException; import java.util.ArrayList; @@ -512,13 +512,15 @@ private boolean shouldReverseLeafReaderContexts() { // This is actually beneficial for search queries to start search on latest segments first for time series workload. // That can slow down ASC order queries on timestamp workload. So to avoid that slowdown, we will reverse leaf // reader order here. - if (searchContext != null && searchContext.indexShard().isTimeSeriesIndex()) { + if (searchContext != null && searchContext.indexShard().isTimeSeriesDescSortOptimizationEnabled()) { // Only reverse order for asc order sort queries - if (searchContext.request() != null - && searchContext.request().source() != null - && searchContext.request().source().sorts() != null - && searchContext.request().source().sorts().size() > 0 - && searchContext.request().source().sorts().get(0).order() == SortOrder.ASC) { + if (searchContext.sort() != null + && searchContext.sort().sort != null + && searchContext.sort().sort.getSort() != null + && searchContext.sort().sort.getSort().length > 0 + && searchContext.sort().sort.getSort()[0].getReverse() == false + && searchContext.sort().sort.getSort()[0].getField() != null + && searchContext.sort().sort.getSort()[0].getField().equals(DataStream.TIMESERIES_FIELDNAME)) { return true; } } From 8d8f924e3e83c207405973754871d465fab813a7 Mon Sep 17 00:00:00 2001 From: gashutos Date: Thu, 8 Jun 2023 20:41:29 +0530 Subject: [PATCH 3/4] Modifying CHANGELOG Signed-off-by: gashutos --- CHANGELOG.md | 2 ++ release-notes/opensearch.release-notes-2.8.0.md | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0473a086d91eb..5eb06e91568fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -92,6 +92,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Added - Add TokenManager Interface ([#7452](https://github.com/opensearch-project/OpenSearch/pull/7452)) - Add Remote store as a segment replication source ([#7653](https://github.com/opensearch-project/OpenSearch/pull/7653)) +- Add descending order search optimization through reverse segment read. ([#7967](https://github.com/opensearch-project/OpenSearch/pull/7967)) + ### Dependencies - Bump `jackson` from 2.15.1 to 2.15.2 ([#7897](https://github.com/opensearch-project/OpenSearch/pull/7897)) diff --git a/release-notes/opensearch.release-notes-2.8.0.md b/release-notes/opensearch.release-notes-2.8.0.md index 4f7dc38997e1e..556e1f0047595 100644 --- a/release-notes/opensearch.release-notes-2.8.0.md +++ b/release-notes/opensearch.release-notes-2.8.0.md @@ -10,7 +10,6 @@ - [Search Pipelines] Add RenameFieldResponseProcessor for Search Pipelines ([#7377](https://github.com/opensearch-project/OpenSearch/pull/7377)) - [Search Pipelines] Split search pipeline processor factories by type ([#7597](https://github.com/opensearch-project/OpenSearch/pull/7597)) - [Search Pipelines] Add script processor ([#7607](https://github.com/opensearch-project/OpenSearch/pull/7607)) -- Add descending order search optimization through reverse segment read. ([#7244](https://github.com/opensearch-project/OpenSearch/pull/7244)) - Add 'unsigned_long' numeric field type ([#6237](https://github.com/opensearch-project/OpenSearch/pull/6237)) - Add back primary shard preference for queries ([#7375](https://github.com/opensearch-project/OpenSearch/pull/7375)) - Add task cancellation timestamp in task API ([#7455](https://github.com/opensearch-project/OpenSearch/pull/7455)) From 106f43a015ef375d0c85e043218e191bbaffe6ee Mon Sep 17 00:00:00 2001 From: gashutos Date: Mon, 12 Jun 2023 15:22:39 +0530 Subject: [PATCH 4/4] Adding integ test for scroll API where sort by _doc is getting early termination Signed-off-by: gashutos --- .../test/scroll/10_basic_timeseries.yml | 161 ++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/scroll/10_basic_timeseries.yml diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/scroll/10_basic_timeseries.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/scroll/10_basic_timeseries.yml new file mode 100644 index 0000000000000..1995bee89a0a4 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/scroll/10_basic_timeseries.yml @@ -0,0 +1,161 @@ +--- +"Basic scroll on time series workload for reversed leaf sorter": + - do: + indices.create: + index: test_scroll_time_series + body: + mappings: + properties: + name: + type: keyword + '@timestamp': + type: date + + - do: + bulk: + refresh: true + index: test_scroll_time_series + body: + - '{"index": {}}' + - '{"name": "1", "@timestamp": "2010-03-12T01:07:00"}' + - '{"index": {}}' + - '{"name": "2", "@timestamp": "2010-03-12T01:07:01"}' + - '{"index": {}}' + - '{"name": "3", "@timestamp": "2010-03-12T01:07:02"}' + - '{"index": {}}' + - '{"name": "4", "@timestamp": "2010-03-12T01:07:03"}' + - '{"index": {}}' + - '{"name": "5", "@timestamp": "2010-03-12T01:07:04"}' + - '{"index": {}}' + - '{"name": "6", "@timestamp": "2010-03-12T01:07:05"}' + - '{"index": {}}' + - '{"name": "7", "@timestamp": "2010-03-12T01:07:06"}' + - '{"index": {}}' + - '{"name": "8", "@timestamp": "2010-03-12T01:07:07"}' + - '{"index": {}}' + - '{"name": "9", "@timestamp": "2010-03-12T01:07:08"}' + - '{"index": {}}' + - '{"name": "10", "@timestamp": "2010-03-12T01:07:09"}' + - do: + indices.refresh: {} + - do: + bulk: + refresh: true + index: test_scroll_time_series + body: + - '{"index": {}}' + - '{"name": "11", "@timestamp": "2010-03-12T01:07:10"}' + - '{"index": {}}' + - '{"name": "12", "@timestamp": "2010-03-12T01:07:11"}' + - '{"index": {}}' + - '{"name": "13", "@timestamp": "2010-03-12T01:07:12"}' + - '{"index": {}}' + - '{"name": "14", "@timestamp": "2010-03-12T01:07:13"}' + - '{"index": {}}' + - '{"name": "15", "@timestamp": "2010-03-12T01:07:14"}' + - '{"index": {}}' + - '{"name": "16", "@timestamp": "2010-03-12T01:07:15"}' + - '{"index": {}}' + - '{"name": "17", "@timestamp": "2010-03-12T01:07:16"}' + - '{"index": {}}' + - '{"name": "18", "@timestamp": "2010-03-12T01:07:17"}' + - '{"index": {}}' + - '{"name": "19", "@timestamp": "2010-03-12T01:07:18"}' + - '{"index": {}}' + - '{"name": "20", "@timestamp": "2010-03-12T01:07:19"}' + - do: + indices.refresh: { } + - do: + bulk: + refresh: true + index: test_scroll_time_series + body: + - '{"index": {}}' + - '{"name": "21", "@timestamp": "2010-03-12T01:07:20"}' + - '{"index": {}}' + - '{"name": "22", "@timestamp": "2010-03-12T01:07:21"}' + - '{"index": {}}' + - '{"name": "23", "@timestamp": "2010-03-12T01:07:22"}' + - '{"index": {}}' + - '{"name": "24", "@timestamp": "2010-03-12T01:07:23"}' + - '{"index": {}}' + - '{"name": "25", "@timestamp": "2010-03-12T01:07:24"}' + - '{"index": {}}' + - '{"name": "26", "@timestamp": "2010-03-12T01:07:25"}' + - '{"index": {}}' + - '{"name": "27", "@timestamp": "2010-03-12T01:07:26"}' + - '{"index": {}}' + - '{"name": "28", "@timestamp": "2010-03-12T01:07:27"}' + - '{"index": {}}' + - '{"name": "29", "@timestamp": "2010-03-12T01:07:28"}' + - '{"index": {}}' + - '{"name": "30", "@timestamp": "2010-03-12T01:07:29"}' + - do: + indices.refresh: { } + + - do: + search: + rest_total_hits_as_int: true + index: test_scroll_time_series + size: 5 + scroll: 1m + sort: _doc + body: + query: + match_all: {} + + - set: {_scroll_id: scroll_id} + - match: {hits.total: 30 } + - length: {hits.hits: 5 } + + - do: + scroll: + rest_total_hits_as_int: true + body: { "scroll_id": "$scroll_id", "scroll": "1m"} + + - match: {hits.total: 30 } + - length: {hits.hits: 5 } + + - do: + scroll: + rest_total_hits_as_int: true + body: { "scroll_id": "$scroll_id", "scroll": "1m" } + + - match: { hits.total: 30 } + - length: { hits.hits: 5 } + + - do: + scroll: + rest_total_hits_as_int: true + body: { "scroll_id": "$scroll_id", "scroll": "1m" } + + - match: { hits.total: 30 } + - length: { hits.hits: 5 } + + - do: + scroll: + rest_total_hits_as_int: true + body: { "scroll_id": "$scroll_id", "scroll": "1m" } + + - match: { hits.total: 30 } + - length: { hits.hits: 5 } + + - do: + scroll: + rest_total_hits_as_int: true + body: { "scroll_id": "$scroll_id", "scroll": "1m" } + + - match: { hits.total: 30 } + - length: { hits.hits: 5 } + + - do: + scroll: + rest_total_hits_as_int: true + body: { "scroll_id": "$scroll_id", "scroll": "1m" } + + - match: { hits.total: 30 } + - length: { hits.hits: 0 } + + - do: + clear_scroll: + scroll_id: $scroll_id