From 9238f17b3f0dcdc6fa5e7eded123d27d94f12241 Mon Sep 17 00:00:00 2001 From: Bharathwaj G Date: Sat, 12 Oct 2024 09:46:14 +0530 Subject: [PATCH] adding tests and fixes Signed-off-by: Bharathwaj G --- .../Composite912DocValuesReader.java | 1 + .../startree/builder/BaseStarTreeBuilder.java | 80 +-- .../builder/OffHeapStarTreeBuilder.java | 11 +- .../builder/OnHeapStarTreeBuilder.java | 13 +- .../AbstractStarTreeDVFormatTests.java | 125 +++++ .../StarTreeDocValuesFormatTests.java | 101 +--- .../StarTreeKeywordDocValuesFormatTests.java | 486 ++++++++++++++++++ .../startree/builder/BuilderTestsUtils.java | 59 ++- .../StarTreeBuilderFlushFlowTests.java | 125 +++++ 9 files changed, 859 insertions(+), 142 deletions(-) create mode 100644 server/src/test/java/org/opensearch/index/codec/composite912/datacube/startree/AbstractStarTreeDVFormatTests.java create mode 100644 server/src/test/java/org/opensearch/index/codec/composite912/datacube/startree/StarTreeKeywordDocValuesFormatTests.java diff --git a/server/src/main/java/org/opensearch/index/codec/composite/composite912/Composite912DocValuesReader.java b/server/src/main/java/org/opensearch/index/codec/composite/composite912/Composite912DocValuesReader.java index e4ee2f2950e45..e5695f9c350ca 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/composite912/Composite912DocValuesReader.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/composite912/Composite912DocValuesReader.java @@ -165,6 +165,7 @@ public Composite912DocValuesReader(DocValuesProducer producer, SegmentReadState dimensionEntry.getKey() ); fields.add(dimName); + dimensionFieldTypeMap.put(dimName, dimensionEntry.getValue()); } // adding metric fields for (Metric metric : starTreeMetadata.getMetrics()) { diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index b61016414e2ee..df6278d2853c7 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -59,6 +59,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -322,48 +323,64 @@ private SequentialDocValuesIterator getSequentialDocValuesIterator( } /** - * Returns the ordinal map based on given star-tree values across different segments + * Returns the ordinal map per field based on given star-tree values across different segments */ - protected OrdinalMap getOrdinalMap(List starTreeValuesSubs, MergeState mergeState) throws IOException { + protected Map getOrdinalMaps(List starTreeValuesSubs, MergeState mergeState) throws IOException { long curr = System.currentTimeMillis(); - List toMerge = new ArrayList<>(); + Map> dimensionToIterators = new HashMap<>(); + // Group iterators by dimension for (StarTreeValues starTree : starTreeValuesSubs) { for (String dimName : starTree.getStarTreeField().getDimensionNames()) { if (starTree.getDimensionValuesIterator(dimName) instanceof SortedSetStarTreeValuesIterator) { - toMerge.add((SortedSetStarTreeValuesIterator) starTree.getDimensionValuesIterator(dimName)); + dimensionToIterators.computeIfAbsent(dimName, k -> new ArrayList<>()) + .add((SortedSetStarTreeValuesIterator) starTree.getDimensionValuesIterator(dimName)); } } } - if (toMerge.isEmpty()) return null; - // step 1: iterate through each sub and mark terms still in use - TermsEnum[] liveTerms = new TermsEnum[toMerge.size()]; - long[] weights = new long[liveTerms.length]; - for (int sub = 0; sub < liveTerms.length; sub++) { - SortedSetStarTreeValuesIterator dv = toMerge.get(sub); - Bits liveDocs = mergeState.liveDocs[sub]; - if (liveDocs == null) { - liveTerms[sub] = dv.termsEnum(); - weights[sub] = dv.getValueCount(); - } else { - LongBitSet bitset = new LongBitSet(dv.getValueCount()); - int docID; - while ((docID = dv.nextEntry()) != NO_MORE_ENTRIES) { - if (liveDocs.get(docID)) { - for (int i = 0; i < dv.docValueCount(); i++) { - bitset.set(dv.nextOrd()); + + if (dimensionToIterators.isEmpty()) return Collections.emptyMap(); + + Map dimensionToOrdinalMap = new HashMap<>(); + + for (Map.Entry> entry : dimensionToIterators.entrySet()) { + String dimName = entry.getKey(); + List iterators = entry.getValue(); + + // step 1: iterate through each sub and mark terms still in use + TermsEnum[] liveTerms = new TermsEnum[iterators.size()]; + long[] weights = new long[liveTerms.length]; + + for (int sub = 0; sub < liveTerms.length; sub++) { + SortedSetStarTreeValuesIterator dv = iterators.get(sub); + Bits liveDocs = mergeState.liveDocs[sub]; + if (liveDocs == null) { + liveTerms[sub] = dv.termsEnum(); + weights[sub] = dv.getValueCount(); + } else { + LongBitSet bitset = new LongBitSet(dv.getValueCount()); + int docID; + while ((docID = dv.nextEntry()) != NO_MORE_ENTRIES) { + if (liveDocs.get(docID)) { + for (int i = 0; i < dv.docValueCount(); i++) { + bitset.set(dv.nextOrd()); + } } } + liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset); + weights[sub] = bitset.cardinality(); } - liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset); - weights[sub] = bitset.cardinality(); } + + // step 2: create ordinal map for this dimension + OrdinalMap map = OrdinalMap.build(null, liveTerms, weights, PackedInts.COMPACT); + dimensionToOrdinalMap.put(dimName, map); + + logger.debug("Ordinal map for dimension {} - Size in bytes: {}", dimName, map.ramBytesUsed()); } - // step 2: create ordinal map (this conceptually does the "merging") - final OrdinalMap map = OrdinalMap.build(null, liveTerms, weights, PackedInts.COMPACT); - logger.debug("Map size in bytes : {}", map.ramBytesUsed()); - logger.debug("Ordinal map takes : {} ", System.currentTimeMillis() - curr); - return map; + + logger.debug("Total time to build ordinal maps: {} ms", System.currentTimeMillis() - curr); + return dimensionToOrdinalMap; } /** @@ -579,13 +596,13 @@ protected StarTreeDocument getStarTreeDocument( int currentDocId, SequentialDocValuesIterator[] dimensionReaders, List metricReaders, - LongValues longValues + Map longValues ) throws IOException { Long[] dims = new Long[numDimensions]; int i = 0; for (SequentialDocValuesIterator dimensionValueIterator : dimensionReaders) { dimensionValueIterator.nextEntry(currentDocId); - Long val = dimensionValueIterator.value(currentDocId, longValues); + Long val = dimensionValueIterator.value(currentDocId, longValues.get(starTreeField.getDimensionNames().get(i))); dims[i] = val; i++; } @@ -838,7 +855,7 @@ private static Long getLong(Object metric) { * Sets the sortedSetDocValuesMap. * This is needed as we need to write the ordinals and also the bytesRef associated with it */ - protected void setSortedSetDocValuesMap(Map sortedSetDocValuesMap) { + void setSortedSetDocValuesMap(Map sortedSetDocValuesMap) { this.sortedSetDocValuesMap = sortedSetDocValuesMap; } @@ -908,7 +925,6 @@ private SequentialDocValuesIterator getIteratorForNumericField( * @throws IOException throws an exception if we are unable to add the doc */ private void appendToStarTree(StarTreeDocument starTreeDocument) throws IOException { - // System.out.println(starTreeDocument); appendStarTreeDocument(starTreeDocument); numStarTreeDocs++; } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java index f4eee73fca7c4..7daa7742937c9 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java @@ -32,6 +32,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -146,7 +147,7 @@ public Iterator sortAndAggregateSegmentDocuments( Iterator mergeStarTrees(List starTreeValuesSubs, MergeState mergeState) throws IOException { int numDocs = 0; int[] docIds; - OrdinalMap ordinalMap = getOrdinalMap(starTreeValuesSubs, mergeState); + Map ordinalMaps = getOrdinalMaps(starTreeValuesSubs, mergeState); try { int seg = 0; for (StarTreeValues starTreeValues : starTreeValuesSubs) { @@ -155,12 +156,12 @@ Iterator mergeStarTrees(List starTreeValuesSub AtomicInteger numSegmentDocs = new AtomicInteger(); setReadersAndNumSegmentDocs(dimensionReaders, metricReaders, numSegmentDocs, starTreeValues); int currentDocId = 0; - LongValues longValues = null; - if (ordinalMap != null) { - longValues = ordinalMap.getGlobalOrds(seg); + Map longValuesMap = new LinkedHashMap<>(); + for (Map.Entry entry : ordinalMaps.entrySet()) { + longValuesMap.put(entry.getKey(), entry.getValue().getGlobalOrds(seg)); } while (currentDocId < numSegmentDocs.get()) { - StarTreeDocument starTreeDocument = getStarTreeDocument(currentDocId, dimensionReaders, metricReaders, longValues); + StarTreeDocument starTreeDocument = getStarTreeDocument(currentDocId, dimensionReaders, metricReaders, longValuesMap); segmentDocumentFileManager.writeStarTreeDocument(starTreeDocument, true); numDocs++; currentDocId++; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java index a6d987974a15b..9a06f8a705ae5 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java @@ -24,8 +24,8 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -134,21 +134,20 @@ Iterator mergeStarTrees(List starTreeValuesSub */ StarTreeDocument[] getSegmentsStarTreeDocuments(List starTreeValuesSubs, MergeState mergeState) throws IOException { List starTreeDocuments = new ArrayList<>(); - OrdinalMap ordinalMap = getOrdinalMap(starTreeValuesSubs, mergeState); + Map ordinalMaps = getOrdinalMaps(starTreeValuesSubs, mergeState); int seg = 0; for (StarTreeValues starTreeValues : starTreeValuesSubs) { - Map segToGlobalOrdMap = new HashMap<>(); SequentialDocValuesIterator[] dimensionReaders = new SequentialDocValuesIterator[numDimensions]; List metricReaders = new ArrayList<>(); AtomicInteger numSegmentDocs = new AtomicInteger(); setReadersAndNumSegmentDocs(dimensionReaders, metricReaders, numSegmentDocs, starTreeValues); int currentDocId = 0; - LongValues longValues = null; - if (ordinalMap != null) { - longValues = ordinalMap.getGlobalOrds(seg); + Map longValuesMap = new LinkedHashMap<>(); + for (Map.Entry entry : ordinalMaps.entrySet()) { + longValuesMap.put(entry.getKey(), entry.getValue().getGlobalOrds(seg)); } while (currentDocId < numSegmentDocs.get()) { - starTreeDocuments.add(getStarTreeDocument(currentDocId, dimensionReaders, metricReaders, longValues)); + starTreeDocuments.add(getStarTreeDocument(currentDocId, dimensionReaders, metricReaders, longValuesMap)); currentDocId++; } seg++; diff --git a/server/src/test/java/org/opensearch/index/codec/composite912/datacube/startree/AbstractStarTreeDVFormatTests.java b/server/src/test/java/org/opensearch/index/codec/composite912/datacube/startree/AbstractStarTreeDVFormatTests.java new file mode 100644 index 0000000000000..f688dcf93e874 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/codec/composite912/datacube/startree/AbstractStarTreeDVFormatTests.java @@ -0,0 +1,125 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.composite912.datacube.startree; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene912.Lucene912Codec; +import org.apache.lucene.tests.index.BaseDocValuesFormatTestCase; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.opensearch.Version; +import org.opensearch.cluster.ClusterModule; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.CheckedConsumer; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.core.common.unit.ByteSizeUnit; +import org.opensearch.core.common.unit.ByteSizeValue; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.MapperTestUtils; +import org.opensearch.index.codec.composite.composite912.Composite912Codec; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.indices.IndicesModule; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import static org.opensearch.common.util.FeatureFlags.STAR_TREE_INDEX; + +/** + * Abstract star tree doc values Lucene tests + */ +@LuceneTestCase.SuppressSysoutChecks(bugUrl = "we log a lot on purpose") +public abstract class AbstractStarTreeDVFormatTests extends BaseDocValuesFormatTestCase { + MapperService mapperService = null; + StarTreeFieldConfiguration.StarTreeBuildMode buildMode; + + public AbstractStarTreeDVFormatTests(StarTreeFieldConfiguration.StarTreeBuildMode buildMode) { + this.buildMode = buildMode; + } + + @ParametersFactory + public static Collection parameters() { + List parameters = new ArrayList<>(); + parameters.add(new Object[] { StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP }); + parameters.add(new Object[] { StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP }); + return parameters; + } + + @BeforeClass + public static void createMapper() throws Exception { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(STAR_TREE_INDEX, "true").build()); + } + + @AfterClass + public static void clearMapper() { + FeatureFlags.initializeFeatureFlags(Settings.EMPTY); + } + + @After + public void teardown() throws IOException { + mapperService.close(); + } + + @Override + protected Codec getCodec() { + final Logger testLogger = LogManager.getLogger(StarTreeDocValuesFormatTests.class); + + try { + createMapperService(getExpandedMapping()); + } catch (IOException e) { + throw new RuntimeException(e); + } + Codec codec = new Composite912Codec(Lucene912Codec.Mode.BEST_SPEED, mapperService, testLogger); + return codec; + } + + private void createMapperService(XContentBuilder builder) throws IOException { + Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB)) + .build(); + IndexMetadata indexMetadata = IndexMetadata.builder("test").settings(settings).putMapping(builder.toString()).build(); + IndicesModule indicesModule = new IndicesModule(Collections.emptyList()); + mapperService = MapperTestUtils.newMapperServiceWithHelperAnalyzer( + new NamedXContentRegistry(ClusterModule.getNamedXWriteables()), + createTempDir(), + settings, + indicesModule, + "test" + ); + mapperService.merge(indexMetadata, MapperService.MergeReason.INDEX_TEMPLATE); + } + + abstract XContentBuilder getExpandedMapping() throws IOException; + + XContentBuilder topMapping(CheckedConsumer buildFields) throws IOException { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject().startObject("_doc"); + buildFields.accept(builder); + return builder.endObject().endObject(); + } + +} diff --git a/server/src/test/java/org/opensearch/index/codec/composite912/datacube/startree/StarTreeDocValuesFormatTests.java b/server/src/test/java/org/opensearch/index/codec/composite912/datacube/startree/StarTreeDocValuesFormatTests.java index 158c9dad583bc..884d5a3301848 100644 --- a/server/src/test/java/org/opensearch/index/codec/composite912/datacube/startree/StarTreeDocValuesFormatTests.java +++ b/server/src/test/java/org/opensearch/index/codec/composite912/datacube/startree/StarTreeDocValuesFormatTests.java @@ -8,12 +8,6 @@ package org.opensearch.index.codec.composite912.datacube.startree; -import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.lucene912.Lucene912Codec; import org.apache.lucene.document.Document; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.DirectoryReader; @@ -21,94 +15,32 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SegmentReader; import org.apache.lucene.store.Directory; -import org.apache.lucene.tests.index.BaseDocValuesFormatTestCase; import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TestUtil; -import org.opensearch.Version; -import org.opensearch.cluster.ClusterModule; -import org.opensearch.cluster.metadata.IndexMetadata; -import org.opensearch.common.CheckedConsumer; import org.opensearch.common.lucene.Lucene; -import org.opensearch.common.settings.Settings; -import org.opensearch.common.util.FeatureFlags; -import org.opensearch.common.xcontent.XContentFactory; -import org.opensearch.core.common.unit.ByteSizeUnit; -import org.opensearch.core.common.unit.ByteSizeValue; -import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentBuilder; -import org.opensearch.index.IndexSettings; -import org.opensearch.index.MapperTestUtils; import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; import org.opensearch.index.codec.composite.CompositeIndexReader; -import org.opensearch.index.codec.composite.composite912.Composite912Codec; import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; -import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings; import org.opensearch.index.compositeindex.datacube.startree.StarTreeTestUtils; import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; -import org.opensearch.index.mapper.MapperService; import org.opensearch.index.mapper.NumberFieldMapper; -import org.opensearch.indices.IndicesModule; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.BeforeClass; import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; import java.util.List; -import static org.opensearch.common.util.FeatureFlags.STAR_TREE_INDEX; import static org.opensearch.index.compositeindex.datacube.startree.StarTreeTestUtils.assertStarTreeDocuments; /** * Star tree doc values Lucene tests */ @LuceneTestCase.SuppressSysoutChecks(bugUrl = "we log a lot on purpose") -public class StarTreeDocValuesFormatTests extends BaseDocValuesFormatTestCase { - MapperService mapperService = null; - StarTreeFieldConfiguration.StarTreeBuildMode buildMode; +public class StarTreeDocValuesFormatTests extends AbstractStarTreeDVFormatTests { public StarTreeDocValuesFormatTests(StarTreeFieldConfiguration.StarTreeBuildMode buildMode) { - this.buildMode = buildMode; - } - - @ParametersFactory - public static Collection parameters() { - List parameters = new ArrayList<>(); - parameters.add(new Object[] { StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP }); - parameters.add(new Object[] { StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP }); - return parameters; - } - - @BeforeClass - public static void createMapper() throws Exception { - FeatureFlags.initializeFeatureFlags(Settings.builder().put(STAR_TREE_INDEX, "true").build()); - } - - @AfterClass - public static void clearMapper() { - FeatureFlags.initializeFeatureFlags(Settings.EMPTY); - } - - @After - public void teardown() throws IOException { - mapperService.close(); - } - - @Override - protected Codec getCodec() { - final Logger testLogger = LogManager.getLogger(StarTreeDocValuesFormatTests.class); - - try { - createMapperService(getExpandedMapping()); - } catch (IOException e) { - throw new RuntimeException(e); - } - Codec codec = new Composite912Codec(Lucene912Codec.Mode.BEST_SPEED, mapperService, testLogger); - return codec; + super(buildMode); } public void testStarTreeDocValues() throws IOException { @@ -207,7 +139,8 @@ public void testStarTreeDocValues() throws IOException { directory.close(); } - private XContentBuilder getExpandedMapping() throws IOException { + @Override + protected XContentBuilder getExpandedMapping() throws IOException { return topMapping(b -> { b.startObject("composite"); b.startObject("startree"); @@ -260,30 +193,4 @@ private XContentBuilder getExpandedMapping() throws IOException { b.endObject(); }); } - - private XContentBuilder topMapping(CheckedConsumer buildFields) throws IOException { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject().startObject("_doc"); - buildFields.accept(builder); - return builder.endObject().endObject(); - } - - private void createMapperService(XContentBuilder builder) throws IOException { - Settings settings = Settings.builder() - .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) - .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) - .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB)) - .build(); - IndexMetadata indexMetadata = IndexMetadata.builder("test").settings(settings).putMapping(builder.toString()).build(); - IndicesModule indicesModule = new IndicesModule(Collections.emptyList()); - mapperService = MapperTestUtils.newMapperServiceWithHelperAnalyzer( - new NamedXContentRegistry(ClusterModule.getNamedXWriteables()), - createTempDir(), - settings, - indicesModule, - "test" - ); - mapperService.merge(indexMetadata, MapperService.MergeReason.INDEX_TEMPLATE); - } } diff --git a/server/src/test/java/org/opensearch/index/codec/composite912/datacube/startree/StarTreeKeywordDocValuesFormatTests.java b/server/src/test/java/org/opensearch/index/codec/composite912/datacube/startree/StarTreeKeywordDocValuesFormatTests.java new file mode 100644 index 0000000000000..bf7e7a25ebdbd --- /dev/null +++ b/server/src/test/java/org/opensearch/index/codec/composite912/datacube/startree/StarTreeKeywordDocValuesFormatTests.java @@ -0,0 +1,486 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.composite912.datacube.startree; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.tests.util.TestUtil; +import org.apache.lucene.util.BytesRef; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; +import org.opensearch.index.codec.composite.CompositeIndexReader; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeTestUtils; +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; +import org.opensearch.index.mapper.NumberFieldMapper; + +import java.io.IOException; +import java.util.List; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.STAR_TREE_DOCS_COUNT; +import static org.opensearch.index.compositeindex.datacube.startree.StarTreeTestUtils.assertStarTreeDocuments; + +/** + * Star tree doc values Lucene tests + */ +@LuceneTestCase.SuppressSysoutChecks(bugUrl = "we log a lot on purpose") +public class StarTreeKeywordDocValuesFormatTests extends AbstractStarTreeDVFormatTests { + + public StarTreeKeywordDocValuesFormatTests(StarTreeFieldConfiguration.StarTreeBuildMode buildMode) { + super(buildMode); + } + + public void testStarTreeKeywordDocValues() throws IOException { + Directory directory = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(null); + conf.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); + Document doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 1)); + doc.add(new SortedSetDocValuesField("keyword1", new BytesRef("text1"))); + doc.add(new SortedSetDocValuesField("keyword2", new BytesRef("text2"))); + iw.addDocument(doc); + doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 1)); + doc.add(new SortedSetDocValuesField("keyword1", new BytesRef("text11"))); + doc.add(new SortedSetDocValuesField("keyword2", new BytesRef("text22"))); + iw.addDocument(doc); + iw.forceMerge(1); + doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 2)); + doc.add(new SortedSetDocValuesField("keyword1", new BytesRef("text1"))); + doc.add(new SortedSetDocValuesField("keyword2", new BytesRef("text2"))); + iw.addDocument(doc); + doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 2)); + doc.add(new SortedSetDocValuesField("keyword1", new BytesRef("text11"))); + doc.add(new SortedSetDocValuesField("keyword2", new BytesRef("text22"))); + iw.addDocument(doc); + iw.forceMerge(1); + iw.close(); + + DirectoryReader ir = maybeWrapWithMergingReader(DirectoryReader.open(directory)); + TestUtil.checkReader(ir); + assertEquals(1, ir.leaves().size()); + + // Segment documents + /** + * sndv dv field + * [1, 1, -1] + * [1, 1, -1] + * [2, 2, -2] + * [2, 2, -2] + */ + // Star tree documents + /** + SortedSet dv | [ sum, value_count, min, max[sndv]] , doc_count + [0, 0] | [3.0, 2.0, 1.0, 2.0, 2.0] + [1, 1] | [3.0, 2.0, 1.0, 2.0, 2.0] + [null, 0] | [3.0, 2.0, 1.0, 2.0, 2.0] + [null, 1] | [3.0, 2.0, 1.0, 2.0, 2.0] + [null, null] | [6.0, 4.0, 1.0, 2.0, 4.0] + */ + StarTreeDocument[] expectedStarTreeDocuments = new StarTreeDocument[5]; + expectedStarTreeDocuments[0] = new StarTreeDocument(new Long[] { 0L, 0L }, new Double[] { 3.0, 2.0, 1.0, 2.0, 2.0 }); + expectedStarTreeDocuments[1] = new StarTreeDocument(new Long[] { 1L, 1L }, new Double[] { 3.0, 2.0, 1.0, 2.0, 2.0 }); + expectedStarTreeDocuments[2] = new StarTreeDocument(new Long[] { null, 0L }, new Double[] { 3.0, 2.0, 1.0, 2.0, 2.0 }); + expectedStarTreeDocuments[3] = new StarTreeDocument(new Long[] { null, 1L }, new Double[] { 3.0, 2.0, 1.0, 2.0, 2.0 }); + expectedStarTreeDocuments[4] = new StarTreeDocument(new Long[] { null, null }, new Double[] { 6.0, 4.0, 1.0, 2.0, 4.0 }); + + for (LeafReaderContext context : ir.leaves()) { + SegmentReader reader = Lucene.segmentReader(context.reader()); + CompositeIndexReader starTreeDocValuesReader = (CompositeIndexReader) reader.getDocValuesReader(); + List compositeIndexFields = starTreeDocValuesReader.getCompositeIndexFields(); + + for (CompositeIndexFieldInfo compositeIndexFieldInfo : compositeIndexFields) { + StarTreeValues starTreeValues = (StarTreeValues) starTreeDocValuesReader.getCompositeIndexValues(compositeIndexFieldInfo); + StarTreeDocument[] starTreeDocuments = StarTreeTestUtils.getSegmentsStarTreeDocuments( + List.of(starTreeValues), + List.of( + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG + ), + Integer.parseInt(starTreeValues.getAttributes().get(STAR_TREE_DOCS_COUNT)) + ); + assertStarTreeDocuments(starTreeDocuments, expectedStarTreeDocuments); + } + } + ir.close(); + directory.close(); + } + + public void testStarKeywordDocValuesWithMissingDocs() throws IOException { + Directory directory = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(null); + conf.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); + Document doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 1)); + doc.add(new SortedSetDocValuesField("keyword2", new BytesRef("text2"))); + iw.addDocument(doc); + doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 1)); + doc.add(new SortedSetDocValuesField("keyword2", new BytesRef("text22"))); + iw.addDocument(doc); + iw.forceMerge(1); + doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 2)); + doc.add(new SortedSetDocValuesField("keyword1", new BytesRef("text1"))); + doc.add(new SortedSetDocValuesField("keyword2", new BytesRef("text2"))); + iw.addDocument(doc); + doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 2)); + doc.add(new SortedSetDocValuesField("keyword1", new BytesRef("text11"))); + doc.add(new SortedSetDocValuesField("keyword2", new BytesRef("text22"))); + iw.addDocument(doc); + iw.forceMerge(1); + iw.close(); + + DirectoryReader ir = maybeWrapWithMergingReader(DirectoryReader.open(directory)); + TestUtil.checkReader(ir); + assertEquals(1, ir.leaves().size()); + + // Segment documents + /** + * sndv dv field + * [1, 1, -1] + * [1, 1, -1] + * [2, 2, -2] + * [2, 2, -2] + */ + // Star tree documents + /** + * sndv dv | [ sum, value_count, min, max[sndv]] , doc_count + [0, 0] | [2.0, 1.0, 2.0, 2.0, 1.0] + [1, 1] | [2.0, 1.0, 2.0, 2.0, 1.0] + [null, 0] | [1.0, 1.0, 1.0, 1.0, 1.0] + [null, 1] | [1.0, 1.0, 1.0, 1.0, 1.0] + [null, 0] | [3.0, 2.0, 1.0, 2.0, 2.0] + [null, 1] | [3.0, 2.0, 1.0, 2.0, 2.0] + [null, null] | [6.0, 4.0, 1.0, 2.0, 4.0] + [null, null] | [2.0, 2.0, 1.0, 1.0, 2.0] + */ + StarTreeDocument[] expectedStarTreeDocuments = new StarTreeDocument[8]; + expectedStarTreeDocuments[0] = new StarTreeDocument(new Long[] { 0L, 0L }, new Double[] { 2.0, 1.0, 2.0, 2.0, 1.0 }); + expectedStarTreeDocuments[1] = new StarTreeDocument(new Long[] { 1L, 1L }, new Double[] { 2.0, 1.0, 2.0, 2.0, 1.0 }); + expectedStarTreeDocuments[2] = new StarTreeDocument(new Long[] { null, 0L }, new Double[] { 1.0, 1.0, 1.0, 1.0, 1.0 }); + expectedStarTreeDocuments[3] = new StarTreeDocument(new Long[] { null, 1L }, new Double[] { 1.0, 1.0, 1.0, 1.0, 1.0 }); + expectedStarTreeDocuments[4] = new StarTreeDocument(new Long[] { null, 0L }, new Double[] { 3.0, 2.0, 1.0, 2.0, 2.0 }); + expectedStarTreeDocuments[5] = new StarTreeDocument(new Long[] { null, 1L }, new Double[] { 3.0, 2.0, 1.0, 2.0, 2.0 }); + expectedStarTreeDocuments[6] = new StarTreeDocument(new Long[] { null, null }, new Double[] { 6.0, 4.0, 1.0, 2.0, 4.0 }); + expectedStarTreeDocuments[7] = new StarTreeDocument(new Long[] { null, null }, new Double[] { 2.0, 2.0, 1.0, 1.0, 2.0 }); + + for (LeafReaderContext context : ir.leaves()) { + SegmentReader reader = Lucene.segmentReader(context.reader()); + CompositeIndexReader starTreeDocValuesReader = (CompositeIndexReader) reader.getDocValuesReader(); + List compositeIndexFields = starTreeDocValuesReader.getCompositeIndexFields(); + + for (CompositeIndexFieldInfo compositeIndexFieldInfo : compositeIndexFields) { + StarTreeValues starTreeValues = (StarTreeValues) starTreeDocValuesReader.getCompositeIndexValues(compositeIndexFieldInfo); + StarTreeDocument[] starTreeDocuments = StarTreeTestUtils.getSegmentsStarTreeDocuments( + List.of(starTreeValues), + List.of( + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG + ), + Integer.parseInt(starTreeValues.getAttributes().get(STAR_TREE_DOCS_COUNT)) + ); + assertStarTreeDocuments(starTreeDocuments, expectedStarTreeDocuments); + } + } + ir.close(); + directory.close(); + } + + public void testStarKeywordDocValuesWithMissingDocsInSegment() throws IOException { + Directory directory = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(null); + conf.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); + Document doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 1)); + iw.addDocument(doc); + doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 1)); + iw.addDocument(doc); + iw.forceMerge(1); + doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 2)); + doc.add(new SortedSetDocValuesField("keyword1", new BytesRef("text1"))); + doc.add(new SortedSetDocValuesField("keyword2", new BytesRef("text2"))); + iw.addDocument(doc); + doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 2)); + doc.add(new SortedSetDocValuesField("keyword1", new BytesRef("text11"))); + doc.add(new SortedSetDocValuesField("keyword2", new BytesRef("text22"))); + iw.addDocument(doc); + iw.forceMerge(1); + iw.close(); + + DirectoryReader ir = maybeWrapWithMergingReader(DirectoryReader.open(directory)); + TestUtil.checkReader(ir); + assertEquals(1, ir.leaves().size()); + + // Segment documents + /** + * sndv dv field + * [1, 1, -1] + * [1, 1, -1] + * [2, 2, -2] + * [2, 2, -2] + */ + // Star tree documents + /** + * sndv dv | [ sum, value_count, min, max[sndv]] , doc_count + [0, 0] | [2.0, 1.0, 2.0, 2.0, 1.0] + [1, 1] | [2.0, 1.0, 2.0, 2.0, 1.0] + [null, null] | [2.0, 2.0, 1.0, 1.0, 2.0] // This is for missing doc + [null, 0] | [2.0, 1.0, 2.0, 2.0, 1.0] + [null, 1] | [2.0, 1.0, 2.0, 2.0, 1.0] + [null, null] | [2.0, 2.0, 1.0, 1.0, 2.0] + [null, null] | [6.0, 4.0, 1.0, 2.0, 4.0] // This is star document + */ + StarTreeDocument[] expectedStarTreeDocuments = new StarTreeDocument[7]; + expectedStarTreeDocuments[0] = new StarTreeDocument(new Long[] { 0L, 0L }, new Double[] { 2.0, 1.0, 2.0, 2.0, 1.0 }); + expectedStarTreeDocuments[1] = new StarTreeDocument(new Long[] { 1L, 1L }, new Double[] { 2.0, 1.0, 2.0, 2.0, 1.0 }); + expectedStarTreeDocuments[2] = new StarTreeDocument(new Long[] { null, null }, new Double[] { 2.0, 2.0, 1.0, 1.0, 2.0 }); + expectedStarTreeDocuments[3] = new StarTreeDocument(new Long[] { null, 0L }, new Double[] { 2.0, 1.0, 2.0, 2.0, 1.0 }); + expectedStarTreeDocuments[4] = new StarTreeDocument(new Long[] { null, 1L }, new Double[] { 2.0, 1.0, 2.0, 2.0, 1.0 }); + expectedStarTreeDocuments[5] = new StarTreeDocument(new Long[] { null, null }, new Double[] { 2.0, 2.0, 1.0, 1.0, 2.0 }); + expectedStarTreeDocuments[6] = new StarTreeDocument(new Long[] { null, null }, new Double[] { 6.0, 4.0, 1.0, 2.0, 4.0 }); + + for (LeafReaderContext context : ir.leaves()) { + SegmentReader reader = Lucene.segmentReader(context.reader()); + CompositeIndexReader starTreeDocValuesReader = (CompositeIndexReader) reader.getDocValuesReader(); + List compositeIndexFields = starTreeDocValuesReader.getCompositeIndexFields(); + + for (CompositeIndexFieldInfo compositeIndexFieldInfo : compositeIndexFields) { + StarTreeValues starTreeValues = (StarTreeValues) starTreeDocValuesReader.getCompositeIndexValues(compositeIndexFieldInfo); + StarTreeDocument[] starTreeDocuments = StarTreeTestUtils.getSegmentsStarTreeDocuments( + List.of(starTreeValues), + List.of( + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG + ), + Integer.parseInt(starTreeValues.getAttributes().get(STAR_TREE_DOCS_COUNT)) + ); + assertStarTreeDocuments(starTreeDocuments, expectedStarTreeDocuments); + } + } + ir.close(); + directory.close(); + } + + public void testStarKeywordDocValuesWithMissingDocsInAllSegments() throws IOException { + Directory directory = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(null); + conf.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); + Document doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 1)); + iw.addDocument(doc); + doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 1)); + iw.addDocument(doc); + iw.forceMerge(1); + doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 2)); + iw.addDocument(doc); + doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 2)); + iw.addDocument(doc); + iw.forceMerge(1); + iw.close(); + + DirectoryReader ir = maybeWrapWithMergingReader(DirectoryReader.open(directory)); + TestUtil.checkReader(ir); + assertEquals(1, ir.leaves().size()); + + // Segment documents + /** + * sndv dv field + * [1, 1, -1] + * [1, 1, -1] + * [2, 2, -2] + * [2, 2, -2] + */ + // Star tree documents + /** + * sndv dv | [ sum, value_count, min, max[sndv]] , doc_count + [null, null] | [6.0, 4.0, 1.0, 2.0, 4.0] + + */ + StarTreeDocument[] expectedStarTreeDocuments = new StarTreeDocument[1]; + expectedStarTreeDocuments[0] = new StarTreeDocument(new Long[] { null, null }, new Double[] { 6.0, 4.0, 1.0, 2.0, 4.0 }); + + for (LeafReaderContext context : ir.leaves()) { + SegmentReader reader = Lucene.segmentReader(context.reader()); + CompositeIndexReader starTreeDocValuesReader = (CompositeIndexReader) reader.getDocValuesReader(); + List compositeIndexFields = starTreeDocValuesReader.getCompositeIndexFields(); + + for (CompositeIndexFieldInfo compositeIndexFieldInfo : compositeIndexFields) { + StarTreeValues starTreeValues = (StarTreeValues) starTreeDocValuesReader.getCompositeIndexValues(compositeIndexFieldInfo); + StarTreeDocument[] starTreeDocuments = StarTreeTestUtils.getSegmentsStarTreeDocuments( + List.of(starTreeValues), + List.of( + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG + ), + Integer.parseInt(starTreeValues.getAttributes().get(STAR_TREE_DOCS_COUNT)) + ); + assertStarTreeDocuments(starTreeDocuments, expectedStarTreeDocuments); + } + } + ir.close(); + directory.close(); + } + + public void testStarKeywordDocValuesWithMissingDocsInMixedSegments() throws IOException { + Directory directory = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(null); + conf.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); + Document doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 1)); + iw.addDocument(doc); + doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 1)); + iw.addDocument(doc); + iw.forceMerge(1); + doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 2)); + doc.add(new SortedSetDocValuesField("keyword1", new BytesRef("text1"))); + iw.addDocument(doc); + doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 2)); + iw.addDocument(doc); + iw.forceMerge(1); + iw.close(); + + DirectoryReader ir = maybeWrapWithMergingReader(DirectoryReader.open(directory)); + TestUtil.checkReader(ir); + assertEquals(1, ir.leaves().size()); + + // Segment documents + /** + * sndv dv field + * [1, 1, -1] + * [1, 1, -1] + * [2, 2, -2] + * [2, 2, -2] + */ + // Star tree documents + /** + * sndv dv | [ sum, value_count, min, max[sndv]] , doc_count + [0, 0] | [2.0, 1.0, 2.0, 2.0, 1.0] + [1, 1] | [2.0, 1.0, 2.0, 2.0, 1.0] + [null, 0] | [1.0, 1.0, 1.0, 1.0, 1.0] + [null, 1] | [1.0, 1.0, 1.0, 1.0, 1.0] + [null, 0] | [3.0, 2.0, 1.0, 2.0, 2.0] + [null, 1] | [3.0, 2.0, 1.0, 2.0, 2.0] + [null, null] | [6.0, 4.0, 1.0, 2.0, 4.0] + [null, null] | [2.0, 2.0, 1.0, 1.0, 2.0] + */ + StarTreeDocument[] expectedStarTreeDocuments = new StarTreeDocument[3]; + expectedStarTreeDocuments[0] = new StarTreeDocument(new Long[] { 0L, null }, new Double[] { 2.0, 1.0, 2.0, 2.0, 1.0 }); + expectedStarTreeDocuments[1] = new StarTreeDocument(new Long[] { null, null }, new Double[] { 4.0, 3.0, 1.0, 2.0, 3.0 }); + expectedStarTreeDocuments[2] = new StarTreeDocument(new Long[] { null, null }, new Double[] { 6.0, 4.0, 1.0, 2.0, 4.0 }); + + for (LeafReaderContext context : ir.leaves()) { + SegmentReader reader = Lucene.segmentReader(context.reader()); + CompositeIndexReader starTreeDocValuesReader = (CompositeIndexReader) reader.getDocValuesReader(); + List compositeIndexFields = starTreeDocValuesReader.getCompositeIndexFields(); + + for (CompositeIndexFieldInfo compositeIndexFieldInfo : compositeIndexFields) { + StarTreeValues starTreeValues = (StarTreeValues) starTreeDocValuesReader.getCompositeIndexValues(compositeIndexFieldInfo); + StarTreeDocument[] starTreeDocuments = StarTreeTestUtils.getSegmentsStarTreeDocuments( + List.of(starTreeValues), + List.of( + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG + ), + Integer.parseInt(starTreeValues.getAttributes().get(STAR_TREE_DOCS_COUNT)) + ); + assertStarTreeDocuments(starTreeDocuments, expectedStarTreeDocuments); + } + } + ir.close(); + directory.close(); + } + + @Override + protected XContentBuilder getExpandedMapping() throws IOException { + return topMapping(b -> { + b.startObject("composite"); + b.startObject("startree"); + b.field("type", "star_tree"); + b.startObject("config"); + b.field("max_leaf_docs", 1); + b.startArray("ordered_dimensions"); + b.startObject(); + b.field("name", "keyword1"); + b.endObject(); + b.startObject(); + b.field("name", "keyword2"); + b.endObject(); + b.endArray(); + b.startArray("metrics"); + b.startObject(); + b.field("name", "sndv"); + b.startArray("stats"); + b.value("sum"); + b.value("value_count"); + b.value("avg"); + b.value("min"); + b.value("max"); + b.endArray(); + b.endObject(); + b.endArray(); + b.endObject(); + b.endObject(); + b.endObject(); + b.startObject("properties"); + b.startObject("sndv"); + b.field("type", "integer"); + b.endObject(); + b.startObject("keyword1"); + b.field("type", "keyword"); + b.endObject(); + b.startObject("keyword2"); + b.field("type", "keyword"); + b.endObject(); + b.endObject(); + }); + } +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BuilderTestsUtils.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BuilderTestsUtils.java index b6f91538dddb1..f9c5bb3e86975 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BuilderTestsUtils.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BuilderTestsUtils.java @@ -18,11 +18,13 @@ import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.Version; import org.opensearch.index.codec.composite.LuceneDocValuesProducerFactory; @@ -150,6 +152,61 @@ public long cost() { }; } + public static SortedSetDocValues getSortedSetMock(List dimList, List docsWithField) { + return new SortedSetDocValues() { + int index = -1; + + @Override + public long nextOrd() throws IOException { + return dimList.get(index); + } + + @Override + public int docValueCount() { + return 1; + } + + @Override + public BytesRef lookupOrd(long l) throws IOException { + return new BytesRef("dummy" + l); + } + + @Override + public long getValueCount() { + return 0; + } + + @Override + public boolean advanceExact(int target) { + return false; + } + + @Override + public int docID() { + return index; + } + + @Override + public int nextDoc() { + if (index == docsWithField.size() - 1) { + return NO_MORE_DOCS; + } + index++; + return docsWithField.get(index); + } + + @Override + public int advance(int target) { + return 0; + } + + @Override + public long cost() { + return 0; + } + }; + } + public static void validateStarTree( InMemoryTreeNode root, int totalDimensions, @@ -409,7 +466,7 @@ public static SegmentReadState getReadState( false, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, - DocValuesType.SORTED_NUMERIC, + dimensionFields.get(dimension), -1, Collections.emptyMap(), 0, diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilderFlushFlowTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilderFlushFlowTests.java index e9a61c1f1782f..b70ae2247aadc 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilderFlushFlowTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilderFlushFlowTests.java @@ -14,11 +14,13 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.IndexInput; import org.opensearch.index.codec.composite.LuceneDocValuesConsumerFactory; import org.opensearch.index.codec.composite.composite912.Composite912DocValuesFormat; import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.KeywordDimension; import org.opensearch.index.compositeindex.datacube.Metric; import org.opensearch.index.compositeindex.datacube.MetricStat; import org.opensearch.index.compositeindex.datacube.NumericDimension; @@ -28,6 +30,7 @@ import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator; +import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedSetStarTreeValuesIterator; import java.io.IOException; import java.util.ArrayList; @@ -40,6 +43,7 @@ import java.util.concurrent.atomic.AtomicInteger; import static org.opensearch.index.compositeindex.datacube.startree.builder.BuilderTestsUtils.getSortedNumericMock; +import static org.opensearch.index.compositeindex.datacube.startree.builder.BuilderTestsUtils.getSortedSetMock; import static org.opensearch.index.compositeindex.datacube.startree.builder.BuilderTestsUtils.validateStarTree; import static org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter.VERSION_CURRENT; import static org.opensearch.index.mapper.CompositeMappedFieldType.CompositeFieldType.STAR_TREE; @@ -406,6 +410,115 @@ public void testFlushFlowWithTimestamps() throws IOException { validateStarTree(builder.getRootNode(), 3, 10, builder.getStarTreeDocuments()); } + public void testFlushFlowForKeywords() throws IOException { + List dimList = List.of(0L, 1L, 2L, 3L, 4L, 5L); + List docsWithField = List.of(0, 1, 2, 3, 4, 5); + List dimList2 = List.of(0L, 1L, 2L, 3L, 4L, 5L); + List docsWithField2 = List.of(0, 1, 2, 3, 4, 5); + + List metricsList = List.of( + getLongFromDouble(0.0), + getLongFromDouble(10.0), + getLongFromDouble(20.0), + getLongFromDouble(30.0), + getLongFromDouble(40.0), + getLongFromDouble(50.0) + ); + List metricsWithField = List.of(0, 1, 2, 3, 4, 5); + + compositeField = getStarTreeFieldWithKeywordField(); + SortedSetStarTreeValuesIterator d1sndv = new SortedSetStarTreeValuesIterator(getSortedSetMock(dimList, docsWithField)); + SortedSetStarTreeValuesIterator d2sndv = new SortedSetStarTreeValuesIterator(getSortedSetMock(dimList2, docsWithField2)); + SortedNumericStarTreeValuesIterator m1sndv = new SortedNumericStarTreeValuesIterator( + getSortedNumericMock(metricsList, metricsWithField) + ); + SortedNumericStarTreeValuesIterator m2sndv = new SortedNumericStarTreeValuesIterator( + getSortedNumericMock(metricsList, metricsWithField) + ); + + writeState = getWriteState(6, writeState.segmentInfo.getId()); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); + SequentialDocValuesIterator[] dimDvs = { new SequentialDocValuesIterator(d1sndv), new SequentialDocValuesIterator(d2sndv) }; + Iterator starTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( + dimDvs, + List.of(new SequentialDocValuesIterator(m1sndv), new SequentialDocValuesIterator(m2sndv)) + ); + /** + * Asserting following dim / metrics [ dim1, dim2 / Sum [metric], count [metric] ] + [0, 0] | [0.0, 1] + [1, 1] | [10.0, 1] + [3, 3] | [30.0, 1] + [4, 4] | [40.0, 1] + [5, 5] | [50.0, 1] + [null, 2] | [20.0, 1] + */ + + SegmentWriteState w = getWriteState(DocIdSetIterator.NO_MORE_DOCS, writeState.segmentInfo.getId()); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + w, + Composite912DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite912DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite912DocValuesFormat.META_DOC_VALUES_CODEC, + Composite912DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + Map dv = new LinkedHashMap<>(); + dv.put("field1", getSortedSetMock(dimList, docsWithField)); + dv.put("field3", getSortedSetMock(dimList2, docsWithField2)); + builder.setSortedSetDocValuesMap(dv); + builder.build(starTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); + + List starTreeDocuments = builder.getStarTreeDocuments(); + int count = 0; + for (StarTreeDocument starTreeDocument : starTreeDocuments) { + count++; + if (starTreeDocument.dimensions[1] != null) { + assertEquals( + starTreeDocument.dimensions[0] == null + ? starTreeDocument.dimensions[1] * 1 * 10.0 + : starTreeDocument.dimensions[0] * 10, + starTreeDocument.metrics[0] + ); + assertEquals(1L, starTreeDocument.metrics[1]); + } else { + assertEquals(150D, starTreeDocument.metrics[0]); + assertEquals(6L, starTreeDocument.metrics[1]); + } + } + assertEquals(13, count); + validateStarTree(builder.getRootNode(), 2, 1000, builder.getStarTreeDocuments()); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + LinkedHashMap docValues = new LinkedHashMap<>(); + docValues.put("field1", DocValuesType.SORTED_SET); + docValues.put("field3", DocValuesType.SORTED_SET); + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "sf", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + docValues, + List.of(new Metric("field2", List.of(MetricStat.SUM, MetricStat.VALUE_COUNT, MetricStat.AVG))), + 6, + builder.numStarTreeDocs, + 1000, + Set.of(), + getBuildMode(), + 0, + 264 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + builder.getStarTreeDocuments().size(), + starTreeMetadata, + builder.getStarTreeDocuments() + ); + + } + private StarTreeField getStarTreeFieldWithMultipleMetrics() { Dimension d1 = new NumericDimension("field1"); Dimension d2 = new NumericDimension("field3"); @@ -418,6 +531,18 @@ private StarTreeField getStarTreeFieldWithMultipleMetrics() { return new StarTreeField("sf", dims, metrics, c); } + private StarTreeField getStarTreeFieldWithKeywordField() { + Dimension d1 = new KeywordDimension("field1"); + Dimension d2 = new KeywordDimension("field3"); + Metric m1 = new Metric("field2", List.of(MetricStat.SUM)); + Metric m2 = new Metric("field2", List.of(MetricStat.VALUE_COUNT)); + Metric m3 = new Metric("field2", List.of(MetricStat.AVG)); + List dims = List.of(d1, d2); + List metrics = List.of(m1, m2, m3); + StarTreeFieldConfiguration c = new StarTreeFieldConfiguration(1000, new HashSet<>(), getBuildMode()); + return new StarTreeField("sf", dims, metrics, c); + } + private static DocValuesProducer getDocValuesProducer(SortedNumericDocValues sndv) { return new EmptyDocValuesProducer() { @Override