From 811386e7b6655d421f2c3f67d3413ac2a42dd363 Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Wed, 24 Jul 2024 13:53:19 +0530 Subject: [PATCH 1/3] star tree file formats Signed-off-by: Sarthak Aggarwal --- .../Lucene90DocValuesConsumerWrapper.java | 68 ++ .../Lucene90DocValuesProducerWrapper.java | 85 ++ .../SortedNumericDocValuesWriterWrapper.java | 53 + .../composite/Composite99DocValuesFormat.java | 30 + .../composite/Composite99DocValuesReader.java | 303 ++++- .../composite/Composite99DocValuesWriter.java | 90 +- .../CompositeIndexConstants.java | 31 + .../CompositeIndexMetadata.java | 53 + .../compositeindex/datacube/MetricStat.java | 27 +- .../datacube/ReadDimension.java | 56 + .../startree/StarTreeFieldConfiguration.java | 22 +- .../aggregators/CountValueAggregator.java | 5 + .../aggregators/MaxValueAggregator.java | 0 .../aggregators/MetricAggregatorInfo.java | 9 +- .../aggregators/SumValueAggregator.java | 5 + .../startree/aggregators/ValueAggregator.java | 5 + .../startree/builder/BaseStarTreeBuilder.java | 393 ++++-- .../builder/OnHeapStarTreeBuilder.java | 28 +- .../startree/builder/StarTreeBuilder.java | 22 +- .../startree/builder/StarTreesBuilder.java | 50 +- .../datacube/startree/meta/MetricEntry.java | 33 + .../startree/meta/StarTreeMetadata.java | 230 ++++ .../datacube/startree/meta/TreeMetadata.java | 111 ++ .../datacube/startree/meta/package-info.java | 14 + .../node/FixedLengthStarTreeNode.java | 186 +++ .../datacube/startree/node/StarTree.java | 62 + .../datacube/startree/node/StarTreeNode.java | 11 +- .../startree/node/StarTreeNodeType.java | 90 ++ .../datacube/startree/node/Tree.java | 23 + .../datacube/startree/node/package-info.java | 2 + .../utils/SequentialDocValuesIterator.java | 52 +- .../startree/utils/StarTreeDataWriter.java | 141 +++ .../startree/utils/StarTreeMetaWriter.java | 167 +++ .../startree/utils/StarTreeUtils.java | 161 +++ .../datacube/startree/utils/TreeNode.java | 16 +- .../index/compositeindex/package-info.java | 3 +- .../StarTreeDocValuesFormatTests.java | 38 + .../builder/BaseStarTreeBuilderTests.java | 41 +- .../builder/OnHeapStarTreeBuilderTests.java | 1077 ++++++++++++++++- .../builder/StarTreesBuilderTests.java | 12 +- .../startree/meta/StarTreeMetaTests.java | 218 ++++ .../node/FixedLengthStarTreeNodeTests.java | 208 ++++ .../opensearch/test/OpenSearchTestCase.java | 8 + 43 files changed, 4038 insertions(+), 201 deletions(-) create mode 100644 server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumerWrapper.java create mode 100644 server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducerWrapper.java create mode 100644 server/src/main/java/org/apache/lucene/index/SortedNumericDocValuesWriterWrapper.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregator.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/MetricEntry.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetadata.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/TreeMetadata.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/package-info.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/Tree.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataWriter.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaWriter.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java diff --git a/server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumerWrapper.java b/server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumerWrapper.java new file mode 100644 index 0000000000000..efd6d844a6907 --- /dev/null +++ b/server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumerWrapper.java @@ -0,0 +1,68 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.apache.lucene.codecs.lucene90; + +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.SegmentWriteState; + +import java.io.IOException; + +/** + * This class is an abstraction of the {@link DocValuesConsumer} for the Star Tree index structure. + * It is responsible to consume various types of document values (numeric, binary, sorted, sorted numeric, + * and sorted set) for fields in the Star Tree index. + * + * @opensearch.experimental + */ +public class Lucene90DocValuesConsumerWrapper extends DocValuesConsumer { + + Lucene90DocValuesConsumer lucene90DocValuesConsumer; + + public Lucene90DocValuesConsumerWrapper( + SegmentWriteState state, + String dataCodec, + String dataExtension, + String metaCodec, + String metaExtension + ) throws IOException { + lucene90DocValuesConsumer = new Lucene90DocValuesConsumer(state, dataCodec, dataExtension, metaCodec, metaExtension); + } + + @Override + public void close() throws IOException { + lucene90DocValuesConsumer.close(); + } + + @Override + public void addNumericField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException { + lucene90DocValuesConsumer.addNumericField(fieldInfo, docValuesProducer); + } + + @Override + public void addBinaryField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException { + lucene90DocValuesConsumer.addNumericField(fieldInfo, docValuesProducer); + } + + @Override + public void addSortedField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException { + lucene90DocValuesConsumer.addSortedField(fieldInfo, docValuesProducer); + } + + @Override + public void addSortedNumericField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException { + lucene90DocValuesConsumer.addSortedNumericField(fieldInfo, docValuesProducer); + } + + @Override + public void addSortedSetField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException { + lucene90DocValuesConsumer.addSortedSetField(fieldInfo, docValuesProducer); + } +} diff --git a/server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducerWrapper.java b/server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducerWrapper.java new file mode 100644 index 0000000000000..669e85181082e --- /dev/null +++ b/server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducerWrapper.java @@ -0,0 +1,85 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.apache.lucene.codecs.lucene90; + +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; + +import java.io.IOException; + +/** + * This class is a custom abstraction of the {@link DocValuesProducer} for the Star Tree index structure. + * It is responsible for providing access to various types of document values (numeric, binary, sorted, sorted numeric, + * and sorted set) for fields in the Star Tree index. + * + * @opensearch.experimental + */ +public class Lucene90DocValuesProducerWrapper extends DocValuesProducer { + + Lucene90DocValuesProducer lucene90DocValuesProducer; + SegmentReadState state; + + public Lucene90DocValuesProducerWrapper( + SegmentReadState state, + String dataCodec, + String dataExtension, + String metaCodec, + String metaExtension + ) throws IOException { + lucene90DocValuesProducer = new Lucene90DocValuesProducer(state, dataCodec, dataExtension, metaCodec, metaExtension); + this.state = state; + } + + @Override + public NumericDocValues getNumeric(FieldInfo field) throws IOException { + return this.lucene90DocValuesProducer.getNumeric(field); + } + + @Override + public BinaryDocValues getBinary(FieldInfo field) throws IOException { + return this.lucene90DocValuesProducer.getBinary(field); + } + + @Override + public SortedDocValues getSorted(FieldInfo field) throws IOException { + return this.lucene90DocValuesProducer.getSorted(field); + } + + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { + return this.lucene90DocValuesProducer.getSortedNumeric(field); + } + + @Override + public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { + return this.lucene90DocValuesProducer.getSortedSet(field); + } + + @Override + public void checkIntegrity() throws IOException { + this.lucene90DocValuesProducer.checkIntegrity(); + } + + // returns the doc id set iterator based on field name + public SortedNumericDocValues getSortedNumeric(String fieldName) throws IOException { + return this.lucene90DocValuesProducer.getSortedNumeric(state.fieldInfos.fieldInfo(fieldName)); + } + + @Override + public void close() throws IOException { + this.lucene90DocValuesProducer.close(); + } + +} diff --git a/server/src/main/java/org/apache/lucene/index/SortedNumericDocValuesWriterWrapper.java b/server/src/main/java/org/apache/lucene/index/SortedNumericDocValuesWriterWrapper.java new file mode 100644 index 0000000000000..f7759fcced284 --- /dev/null +++ b/server/src/main/java/org/apache/lucene/index/SortedNumericDocValuesWriterWrapper.java @@ -0,0 +1,53 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.apache.lucene.index; + +import org.apache.lucene.util.Counter; + +/** + * A wrapper class for writing sorted numeric doc values. + *

+ * This class provides a convenient way to add sorted numeric doc values to a field + * and retrieve the corresponding {@link SortedNumericDocValues} instance. + * + * @opensearch.experimental + */ +public class SortedNumericDocValuesWriterWrapper { + + private final SortedNumericDocValuesWriter sortedNumericDocValuesWriter; + + /** + * Sole constructor. Constructs a new {@link SortedNumericDocValuesWriterWrapper} instance. + * + * @param fieldInfo the field information for the field being written + * @param counter a counter for tracking memory usage + */ + public SortedNumericDocValuesWriterWrapper(FieldInfo fieldInfo, Counter counter) { + sortedNumericDocValuesWriter = new SortedNumericDocValuesWriter(fieldInfo, counter); + } + + /** + * Adds a value to the sorted numeric doc values for the specified document. + * + * @param docID the document ID + * @param value the value to add + */ + public void addValue(int docID, long value) { + sortedNumericDocValuesWriter.addValue(docID, value); + } + + /** + * Returns the {@link SortedNumericDocValues} instance containing the sorted numeric doc values + * + * @return the {@link SortedNumericDocValues} instance + */ + public SortedNumericDocValues getDocValues() { + return sortedNumericDocValuesWriter.getDocValues(); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesFormat.java b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesFormat.java index 216ed4f68f333..d4b0c0d1f4b80 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesFormat.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesFormat.java @@ -37,6 +37,36 @@ public class Composite99DocValuesFormat extends DocValuesFormat { private final DocValuesFormat delegate; private final MapperService mapperService; + /** Data codec name for Composite Doc Values Format */ + public static final String DATA_CODEC_NAME = "Composite99FormatData"; + + /** Meta codec name for Composite Doc Values Format */ + public static final String META_CODEC_NAME = "Composite99FormatMeta"; + + /** Filename extension for the composite index data */ + public static final String DATA_EXTENSION = "cid"; + + /** Filename extension for the composite index meta */ + public static final String META_EXTENSION = "cim"; + + /** Data doc values codec name for Composite Doc Values Format */ + public static final String DATA_DOC_VALUES_CODEC = "Composite99DocValuesData"; + + /** Meta doc values codec name for Composite Doc Values Format */ + public static final String META_DOC_VALUES_CODEC = "Composite99DocValuesMetadata"; + + /** Filename extension for the composite index data doc values */ + public static final String DATA_DOC_VALUES_EXTENSION = "cidvd"; + + /** Filename extension for the composite index meta doc values */ + public static final String META_DOC_VALUES_EXTENSION = "cidvm"; + + /** Initial version for the Composite90DocValuesFormat */ + public static final int VERSION_START = 0; + + /** Current version for the Composite90DocValuesFormat */ + public static final int VERSION_CURRENT = VERSION_START; + // needed for SPI public Composite99DocValuesFormat() { this(new Lucene90DocValuesFormat(), null); diff --git a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java index df5008a7f294e..8a2f08b9cb898 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java @@ -8,19 +8,56 @@ package org.opensearch.index.codec.composite; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.lucene90.Lucene90DocValuesProducerWrapper; import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.IndexInput; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; +import org.opensearch.index.compositeindex.CompositeIndexMetadata; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.ReadDimension; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.meta.MetricEntry; +import org.opensearch.index.compositeindex.datacube.startree.meta.StarTreeMetadata; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTree; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.Tree; +import org.opensearch.index.mapper.CompositeMappedFieldType; import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.MAGIC_MARKER; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.SEGMENT_DOCS_COUNT; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.VERSION; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeDimensionsDocValues; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.getFieldInfoList; /** * Reader for star tree index and star tree doc values from the segments @@ -29,11 +66,158 @@ */ @ExperimentalApi public class Composite99DocValuesReader extends DocValuesProducer implements CompositeIndexReader { - private DocValuesProducer delegate; + private static final Logger logger = LogManager.getLogger(Composite99DocValuesReader.class); + + private final DocValuesProducer delegate; + private IndexInput dataIn; + private ChecksumIndexInput metaIn; + private final Map compositeIndexInputMap = new LinkedHashMap<>(); + private final Map compositeIndexMetadataMap = new LinkedHashMap<>(); + private final List fields; + private Lucene90DocValuesProducerWrapper compositeDocValuesProducer; + private final List compositeFieldInfos = new ArrayList<>(); + private final SegmentReadState readState; - public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState state) throws IOException { + public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState readState) throws IOException { this.delegate = producer; - // TODO : read star tree files + this.readState = readState; + this.fields = new ArrayList<>(); + + String metaFileName = IndexFileNames.segmentFileName( + readState.segmentInfo.name, + readState.segmentSuffix, + Composite99DocValuesFormat.META_EXTENSION + ); + + String dataFileName = IndexFileNames.segmentFileName( + readState.segmentInfo.name, + readState.segmentSuffix, + Composite99DocValuesFormat.DATA_EXTENSION + ); + + boolean success = false; + try { + + // initialize meta input + dataIn = readState.directory.openInput(dataFileName, readState.context); + CodecUtil.checkIndexHeader( + dataIn, + Composite99DocValuesFormat.DATA_CODEC_NAME, + Composite99DocValuesFormat.VERSION_START, + Composite99DocValuesFormat.VERSION_CURRENT, + readState.segmentInfo.getId(), + readState.segmentSuffix + ); + + // initialize data input + metaIn = readState.directory.openChecksumInput(metaFileName, readState.context); + Throwable priorE = null; + try { + CodecUtil.checkIndexHeader( + metaIn, + Composite99DocValuesFormat.META_CODEC_NAME, + Composite99DocValuesFormat.VERSION_START, + Composite99DocValuesFormat.VERSION_CURRENT, + readState.segmentInfo.getId(), + readState.segmentSuffix + ); + + while (true) { + + // validate magic marker + long magicMarker = metaIn.readVLong(); + if (magicMarker == -1) { + logger.info("EOF reached for composite index metadata"); + break; + } else if (magicMarker < 0) { + throw new CorruptIndexException("Unknown token encountered: " + magicMarker, metaIn); + } else if (MAGIC_MARKER != magicMarker) { + logger.error("Invalid composite field magic marker"); + throw new IOException("Invalid composite field magic marker"); + } + + int version = metaIn.readVInt(); + if (VERSION != version) { + logger.error("Invalid composite field version"); + throw new IOException("Invalid composite field version"); + } + + // construct composite index metadata + String compositeFieldName = metaIn.readString(); + CompositeMappedFieldType.CompositeFieldType compositeFieldType = CompositeMappedFieldType.CompositeFieldType.fromName( + metaIn.readString() + ); + + switch (compositeFieldType) { + case STAR_TREE: + StarTreeMetadata starTreeMetadata = new StarTreeMetadata(metaIn, compositeFieldName, compositeFieldType); + compositeFieldInfos.add(new CompositeIndexFieldInfo(compositeFieldName, compositeFieldType)); + + IndexInput starTreeIndexInput = dataIn.slice( + "star-tree data slice for respective star-tree fields", + starTreeMetadata.getDataStartFilePointer(), + starTreeMetadata.getDataLength() + ); + compositeIndexInputMap.put(compositeFieldName, starTreeIndexInput); + compositeIndexMetadataMap.put(compositeFieldName, starTreeMetadata); + + List dimensionFieldNumbers = starTreeMetadata.getDimensionFieldNumbers(); + + // generating star tree unique fields (fully qualified name for dimension and metrics) + for (Integer fieldNumber : dimensionFieldNumbers) { + fields.add( + fullyQualifiedFieldNameForStarTreeDimensionsDocValues( + compositeFieldName, + readState.fieldInfos.fieldInfo(fieldNumber).getName() + ) + ); + } + + for (MetricEntry metricEntry : starTreeMetadata.getMetricEntries()) { + fields.add( + fullyQualifiedFieldNameForStarTreeMetricsDocValues( + compositeFieldName, + readState.fieldInfos.fieldInfo(metricEntry.getMetricFieldNumber()).getName(), + MetricStat.fromMetricOrdinal(metricEntry.getMetricStatOrdinal()).getTypeName() + ) + ); + } + + break; + default: + throw new CorruptIndexException("Invalid composite field type found in the file", dataIn); + } + } + + // populates the dummy list of field infos to fetch doc id set iterators for respective fields. + // the dummy field info is used to fetch the doc id set iterators for respective fields based on field name + FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields)); + SegmentReadState segmentReadState = new SegmentReadState( + readState.directory, + readState.segmentInfo, + fieldInfos, + readState.context + ); + + // initialize star-tree doc values producer + compositeDocValuesProducer = new Lucene90DocValuesProducerWrapper( + segmentReadState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + } catch (Throwable t) { + priorE = t; + } finally { + CodecUtil.checkFooter(metaIn, priorE); + } + success = true; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(this); + } + } } @Override @@ -64,24 +248,125 @@ public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { @Override public void checkIntegrity() throws IOException { delegate.checkIntegrity(); - // Todo : check integrity of composite index related [star tree] files + CodecUtil.checksumEntireFile(dataIn); } @Override public void close() throws IOException { delegate.close(); - // Todo: close composite index related files [star tree] files + boolean success = false; + try { + IOUtils.close(metaIn, dataIn); + IOUtils.close(compositeDocValuesProducer); + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(metaIn, dataIn); + } + compositeIndexInputMap.clear(); + compositeIndexMetadataMap.clear(); + fields.clear(); + metaIn = null; + dataIn = null; + } } @Override public List getCompositeIndexFields() { - // todo : read from file formats and get the field names. - return new ArrayList<>(); + return compositeFieldInfos; } @Override public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo compositeIndexFieldInfo) throws IOException { - // TODO : read compositeIndexValues [starTreeValues] from star tree files - throw new UnsupportedOperationException(); + + switch (compositeIndexFieldInfo.getType()) { + case STAR_TREE: + // building star tree values + StarTreeMetadata starTreeMetadata = (StarTreeMetadata) compositeIndexMetadataMap.get(compositeIndexFieldInfo.getField()); + + // build skip star node dimensions + Set skipStarNodeCreationInDimsFieldNumbers = starTreeMetadata.getSkipStarNodeCreationInDims(); + Set skipStarNodeCreationInDims = new HashSet<>(); + for (Integer fieldNumber : skipStarNodeCreationInDimsFieldNumbers) { + skipStarNodeCreationInDims.add(readState.fieldInfos.fieldInfo(fieldNumber).getName()); + } + + // build dimensions + List dimensionFieldNumbers = starTreeMetadata.getDimensionFieldNumbers(); + List dimensions = new ArrayList<>(); + List readDimensions = new ArrayList<>(); + for (Integer fieldNumber : dimensionFieldNumbers) { + dimensions.add(readState.fieldInfos.fieldInfo(fieldNumber).getName()); + readDimensions.add(new ReadDimension(readState.fieldInfos.fieldInfo(fieldNumber).name)); + } + + // build metrics + Map starTreeMetricMap = new LinkedHashMap<>(); + for (MetricEntry metricEntry : starTreeMetadata.getMetricEntries()) { + String metricName = readState.fieldInfos.fieldInfo(metricEntry.getMetricFieldNumber()).getName(); + + Metric metric = starTreeMetricMap.computeIfAbsent(metricName, field -> new Metric(field, new ArrayList<>())); + metric.getMetrics().add(MetricStat.fromMetricOrdinal(metricEntry.getMetricStatOrdinal())); + } + List starTreeMetrics = new ArrayList<>(starTreeMetricMap.values()); + + // star-tree field + StarTreeField starTreeField = new StarTreeField( + starTreeMetadata.getCompositeFieldName(), + readDimensions, + starTreeMetrics, + new StarTreeFieldConfiguration( + starTreeMetadata.getMaxLeafDocs(), + skipStarNodeCreationInDims, + starTreeMetadata.getStarTreeBuildMode() + ) + ); + + // star-tree root node + IndexInput compositeIndexIn = compositeIndexInputMap.get(starTreeMetadata.getCompositeFieldName()); + Tree starTree = new StarTree(compositeIndexIn, starTreeMetadata); + StarTreeNode rootNode = starTree.getRoot(); + + // get doc id set iterators for metrics and dimensions + Map dimensionsDocIdSetIteratorMap = new LinkedHashMap<>(); + Map metricsDocIdSetIteratorMap = new LinkedHashMap<>(); + + // get doc id set iterators for dimensions + for (String dimension : dimensions) { + dimensionsDocIdSetIteratorMap.put( + dimension, + compositeDocValuesProducer.getSortedNumeric( + fullyQualifiedFieldNameForStarTreeDimensionsDocValues(starTreeField.getName(), dimension) + ) + ); + } + + // get doc id set iterators for metrics + for (MetricEntry metricEntry : starTreeMetadata.getMetricEntries()) { + String metricFullName = fullyQualifiedFieldNameForStarTreeMetricsDocValues( + starTreeField.getName(), + readState.fieldInfos.fieldInfo(metricEntry.getMetricFieldNumber()).getName(), + MetricStat.fromMetricOrdinal(metricEntry.getMetricStatOrdinal()).getTypeName() + ); + metricsDocIdSetIteratorMap.put(metricFullName, compositeDocValuesProducer.getSortedNumeric(metricFullName)); + } + + // create star-tree attributes map + Map starTreeAttributes = new HashMap<>(); + starTreeAttributes.put(SEGMENT_DOCS_COUNT, String.valueOf(starTreeMetadata.getSegmentAggregatedDocCount())); + + // return star-tree values + return new StarTreeValues( + starTreeField, + rootNode, + dimensionsDocIdSetIteratorMap, + metricsDocIdSetIteratorMap, + starTreeAttributes + ); + + default: + throw new CorruptIndexException("Unsupported composite index field type: ", compositeIndexFieldInfo.getType().getName()); + } + } } diff --git a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java index 3859d3c998573..6daa7a90112db 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java @@ -10,24 +10,30 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.lucene90.Lucene90DocValuesConsumerWrapper; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.EmptyDocValuesProducer; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.MergeState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.store.IndexOutput; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.util.io.IOUtils; import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.builder.StarTreesBuilder; import org.opensearch.index.mapper.CompositeMappedFieldType; import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.StarTreeMapper; import java.io.IOException; -import java.util.Collections; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -49,12 +55,17 @@ public class Composite99DocValuesWriter extends DocValuesConsumer { AtomicReference mergeState = new AtomicReference<>(); private final Set compositeMappedFieldTypes; private final Set compositeFieldSet; + private DocValuesConsumer composite99DocValuesConsumer; + + public IndexOutput dataOut; + public IndexOutput metaOut; private final Set segmentFieldSet; private final Map fieldProducerMap = new HashMap<>(); private static final Logger logger = LogManager.getLogger(Composite99DocValuesWriter.class); - public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState segmentWriteState, MapperService mapperService) { + public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState segmentWriteState, MapperService mapperService) + throws IOException { this.delegate = delegate; this.state = segmentWriteState; @@ -70,6 +81,51 @@ public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState for (CompositeMappedFieldType type : compositeMappedFieldTypes) { compositeFieldSet.addAll(type.fields()); } + + boolean success = false; + try { + this.composite99DocValuesConsumer = new Lucene90DocValuesConsumerWrapper( + segmentWriteState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + + String dataFileName = IndexFileNames.segmentFileName( + segmentWriteState.segmentInfo.name, + segmentWriteState.segmentSuffix, + Composite99DocValuesFormat.DATA_EXTENSION + ); + dataOut = segmentWriteState.directory.createOutput(dataFileName, segmentWriteState.context); + CodecUtil.writeIndexHeader( + dataOut, + Composite99DocValuesFormat.DATA_CODEC_NAME, + Composite99DocValuesFormat.VERSION_CURRENT, + segmentWriteState.segmentInfo.getId(), + segmentWriteState.segmentSuffix + ); + + String metaFileName = IndexFileNames.segmentFileName( + segmentWriteState.segmentInfo.name, + segmentWriteState.segmentSuffix, + Composite99DocValuesFormat.META_EXTENSION + ); + metaOut = segmentWriteState.directory.createOutput(metaFileName, segmentWriteState.context); + CodecUtil.writeIndexHeader( + metaOut, + Composite99DocValuesFormat.META_CODEC_NAME, + Composite99DocValuesFormat.VERSION_CURRENT, + segmentWriteState.segmentInfo.getId(), + segmentWriteState.segmentSuffix + ); + + success = true; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(this); + } + } } @Override @@ -104,6 +160,26 @@ public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) @Override public void close() throws IOException { delegate.close(); + boolean success = false; + try { + if (metaOut != null) { + metaOut.writeLong(-1); // write EOF marker + CodecUtil.writeFooter(metaOut); // write checksum + } + if (dataOut != null) { + CodecUtil.writeFooter(dataOut); // write checksum + } + + success = true; + } finally { + if (success) { + IOUtils.close(dataOut, metaOut, composite99DocValuesConsumer); + } else { + IOUtils.closeWhileHandlingException(dataOut, metaOut, composite99DocValuesConsumer); + } + metaOut = dataOut = null; + composite99DocValuesConsumer = null; + } } private void createCompositeIndicesIfPossible(DocValuesProducer valuesProducer, FieldInfo field) throws IOException { @@ -128,9 +204,9 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) { // we have all the required fields to build composite fields if (compositeFieldSet.isEmpty()) { for (CompositeMappedFieldType mappedType : compositeMappedFieldTypes) { - if (mappedType.getCompositeIndexType().equals(CompositeMappedFieldType.CompositeFieldType.STAR_TREE)) { + if (mappedType instanceof StarTreeMapper.StarTreeFieldType) { try (StarTreesBuilder starTreesBuilder = new StarTreesBuilder(state, mapperService)) { - starTreesBuilder.build(fieldProducerMap); + starTreesBuilder.build(metaOut, dataOut, fieldProducerMap, composite99DocValuesConsumer); } } } @@ -147,6 +223,7 @@ public void merge(MergeState mergeState) throws IOException { /** * Merges composite fields from multiple segments + * * @param mergeState merge state */ private void mergeCompositeFields(MergeState mergeState) throws IOException { @@ -155,6 +232,7 @@ private void mergeCompositeFields(MergeState mergeState) throws IOException { /** * Merges star tree data fields from multiple segments + * * @param mergeState merge state */ private void mergeStarTreeFields(MergeState mergeState) throws IOException { @@ -177,7 +255,7 @@ private void mergeStarTreeFields(MergeState mergeState) throws IOException { CompositeIndexValues compositeIndexValues = reader.getCompositeIndexValues(fieldInfo); if (compositeIndexValues instanceof StarTreeValues) { StarTreeValues starTreeValues = (StarTreeValues) compositeIndexValues; - List fieldsList = starTreeSubsPerField.getOrDefault(fieldInfo.getField(), Collections.emptyList()); + List fieldsList = starTreeSubsPerField.getOrDefault(fieldInfo.getField(), new ArrayList<>()); if (starTreeField == null) { starTreeField = starTreeValues.getStarTreeField(); } @@ -196,7 +274,7 @@ private void mergeStarTreeFields(MergeState mergeState) throws IOException { } } try (StarTreesBuilder starTreesBuilder = new StarTreesBuilder(state, mapperService)) { - starTreesBuilder.buildDuringMerge(starTreeSubsPerField); + starTreesBuilder.buildDuringMerge(metaOut, dataOut, starTreeSubsPerField, composite99DocValuesConsumer); } } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java new file mode 100644 index 0000000000000..1b357254cfcc4 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex; + +/** + * This class contains constants used in the Composite Index implementation. + */ +public class CompositeIndexConstants { + + /** + * The magic marker value used for sanity checks in the Composite Index implementation. + */ + public static final long MAGIC_MARKER = 0xC0950513F1E1DL; // Composite Field + + /** + * The version of the Composite Index implementation. + */ + public static final int VERSION = 0; + + /** + * Represents the key to fetch number of documents in a segment. + */ + public static final String SEGMENT_DOCS_COUNT = "segment_docs_count"; + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java new file mode 100644 index 0000000000000..6ba401afe0e6f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java @@ -0,0 +1,53 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex; + +import org.opensearch.index.mapper.CompositeMappedFieldType; + +/** + * This class represents the metadata of a Composite Index, which includes information about + * the composite field name, type, and the specific metadata for the type of composite field + * (e.g., Tree metadata). + * + * @opensearch.experimental + */ +public class CompositeIndexMetadata { + + private final String compositeFieldName; + private final CompositeMappedFieldType.CompositeFieldType compositeFieldType; + + /** + * Constructs a CompositeIndexMetadata object with the provided composite field name and type. + * + * @param compositeFieldName the name of the composite field + * @param compositeFieldType the type of the composite field + */ + public CompositeIndexMetadata(String compositeFieldName, CompositeMappedFieldType.CompositeFieldType compositeFieldType) { + this.compositeFieldName = compositeFieldName; + this.compositeFieldType = compositeFieldType; + } + + /** + * Returns the name of the composite field. + * + * @return the composite field name + */ + public String getCompositeFieldName() { + return compositeFieldName; + } + + /** + * Returns the type of the composite field. + * + * @return the composite field type + */ + public CompositeMappedFieldType.CompositeFieldType getCompositeFieldType() { + return compositeFieldType; + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java index fbde296b15f7e..632f8de21d26d 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java @@ -17,22 +17,28 @@ */ @ExperimentalApi public enum MetricStat { - COUNT("count"), - AVG("avg"), - SUM("sum"), - MIN("min"), - MAX("max"); + COUNT("count", 0), + AVG("avg", 1), + SUM("sum", 2), + MIN("min", 3), + MAX("max", 4); private final String typeName; + private final int metricOrdinal; - MetricStat(String typeName) { + MetricStat(String typeName, int metricOrdinal) { this.typeName = typeName; + this.metricOrdinal = metricOrdinal; } public String getTypeName() { return typeName; } + public int getMetricOrdinal() { + return metricOrdinal; + } + public static MetricStat fromTypeName(String typeName) { for (MetricStat metric : MetricStat.values()) { if (metric.getTypeName().equalsIgnoreCase(typeName)) { @@ -41,4 +47,13 @@ public static MetricStat fromTypeName(String typeName) { } throw new IllegalArgumentException("Invalid metric stat: " + typeName); } + + public static MetricStat fromMetricOrdinal(int metricOrdinal) { + for (MetricStat metric : MetricStat.values()) { + if (metric.getMetricOrdinal() == metricOrdinal) { + return metric; + } + } + throw new IllegalArgumentException("Invalid metric stat: " + metricOrdinal); + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java new file mode 100644 index 0000000000000..5b6a20b8d1078 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java @@ -0,0 +1,56 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube; + +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.mapper.CompositeDataCubeFieldType; + +import java.io.IOException; +import java.util.Objects; + +/** + * Composite index merge dimension class + * + * @opensearch.experimental + */ +public class ReadDimension implements Dimension { + public static final String READ = "read"; + private final String field; + + public ReadDimension(String field) { + this.field = field; + } + + public String getField() { + return field; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { + builder.startObject(); + builder.field(CompositeDataCubeFieldType.NAME, field); + builder.field(CompositeDataCubeFieldType.TYPE, READ); + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ReadDimension dimension = (ReadDimension) o; + return Objects.equals(field, dimension.getField()); + } + + @Override + public int hashCode() { + return Objects.hash(field); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java index 755c064c2c60a..d732a8598d711 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java @@ -56,19 +56,25 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws @ExperimentalApi public enum StarTreeBuildMode { // TODO : remove onheap support unless this proves useful - ON_HEAP("onheap"), - OFF_HEAP("offheap"); + ON_HEAP("onheap", (byte) 0), + OFF_HEAP("offheap", (byte) 1); private final String typeName; + private final byte buildModeOrdinal; - StarTreeBuildMode(String typeName) { + StarTreeBuildMode(String typeName, byte buildModeOrdinal) { this.typeName = typeName; + this.buildModeOrdinal = buildModeOrdinal; } public String getTypeName() { return typeName; } + public byte getBuildModeOrdinal() { + return buildModeOrdinal; + } + public static StarTreeBuildMode fromTypeName(String typeName) { for (StarTreeBuildMode starTreeBuildMode : StarTreeBuildMode.values()) { if (starTreeBuildMode.getTypeName().equalsIgnoreCase(typeName)) { @@ -77,6 +83,16 @@ public static StarTreeBuildMode fromTypeName(String typeName) { } throw new IllegalArgumentException(String.format(Locale.ROOT, "Invalid star tree build mode: [%s] ", typeName)); } + + public static StarTreeBuildMode fromBuildModeOrdinal(byte buildModeOrdinal) { + for (StarTreeBuildMode starTreeBuildMode : StarTreeBuildMode.values()) { + if (starTreeBuildMode.getBuildModeOrdinal() == buildModeOrdinal) { + return starTreeBuildMode; + } + } + throw new IllegalArgumentException(String.format(Locale.ROOT, "Invalid star tree build mode: [%s] ", buildModeOrdinal)); + } + } public int maxLeafDocs() { diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java index 5390b6728b9b6..0eb5614e829ca 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java @@ -68,4 +68,9 @@ public Long toLongValue(Long value) { public Long toStarTreeNumericTypeValue(Long value) { return value; } + + @Override + public long getIdentityMetricValue() { + return 0L; + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregator.java new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfo.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfo.java index a9209a38eca82..b4327122d6fb7 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfo.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfo.java @@ -79,7 +79,12 @@ public StarTreeNumericType getAggregatedValueType() { * @return field name with metric type and field */ public String toFieldName() { - return starFieldName + DELIMITER + field + DELIMITER + metricStat.getTypeName(); + return toFieldName(starFieldName, field, metricStat.getTypeName()); + + } + + public static String toFieldName(String starFieldName, String field, String metricName) { + return starFieldName + DELIMITER + field + DELIMITER + metricName; } @Override @@ -94,7 +99,7 @@ public boolean equals(Object obj) { } if (obj instanceof MetricAggregatorInfo) { MetricAggregatorInfo anotherPair = (MetricAggregatorInfo) obj; - return metricStat == anotherPair.metricStat && field.equals(anotherPair.field); + return metricStat.equals(anotherPair.metricStat) && field.equals(anotherPair.field); } return false; } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java index 385549216e4d6..c76b668d2ba3e 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java @@ -103,4 +103,9 @@ public Double toStarTreeNumericTypeValue(Long value) { throw new IllegalStateException("Cannot convert " + value + " to sortable aggregation type", e); } } + + @Override + public long getIdentityMetricValue() { + return 0; + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java index 93230ed012b13..f6e1761a8a85f 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java @@ -61,4 +61,9 @@ public interface ValueAggregator { * Converts an aggregated value from a Long type. */ A toStarTreeNumericTypeValue(Long rawValue); + + /** + * Fetches a value that does not alter the result of aggregations + */ + long getIdentityMetricValue(); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index 7187fade882ea..7cc0360f76e32 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -9,14 +9,20 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.EmptyDocValuesProducer; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedNumericDocValuesWriterWrapper; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; -import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.Counter; +import org.apache.lucene.util.NumericUtils; import org.opensearch.index.compositeindex.datacube.Dimension; import org.opensearch.index.compositeindex.datacube.Metric; import org.opensearch.index.compositeindex.datacube.MetricStat; @@ -25,7 +31,10 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; import org.opensearch.index.compositeindex.datacube.startree.aggregators.ValueAggregator; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; import org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode; import org.opensearch.index.fielddata.IndexNumericFieldData; import org.opensearch.index.mapper.Mapper; @@ -43,8 +52,11 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; -import static org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode.ALL; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeDimensionsDocValues; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues; /** * Builder for star tree. Defines the algorithm to construct star-tree @@ -74,19 +86,31 @@ public abstract class BaseStarTreeBuilder implements StarTreeBuilder { private final StarTreeField starTreeField; private final MapperService mapperService; - private final SegmentWriteState state; + private final SegmentWriteState writeState; + + private final IndexOutput metaOut; + private final IndexOutput dataOut; static String NUM_SEGMENT_DOCS = "numSegmentDocs"; /** * Reads all the configuration related to dimensions and metrics, builds a star-tree based on the different construction parameters. * - * @param starTreeField holds the configuration for the star tree - * @param state stores the segment write state - * @param mapperService helps to find the original type of the field + * @param starTreeField holds the configuration for the star tree + * @param writeState stores the segment write writeState + * @param mapperService helps to find the original type of the field */ - protected BaseStarTreeBuilder(StarTreeField starTreeField, SegmentWriteState state, MapperService mapperService) { + protected BaseStarTreeBuilder( + IndexOutput metaOut, + IndexOutput dataOut, + StarTreeField starTreeField, + SegmentWriteState writeState, + MapperService mapperService + ) { logger.debug("Building star tree : {}", starTreeField.getName()); + this.metaOut = metaOut; + this.dataOut = dataOut; + this.starTreeField = starTreeField; StarTreeFieldConfiguration starTreeFieldSpec = starTreeField.getStarTreeConfig(); @@ -94,9 +118,9 @@ protected BaseStarTreeBuilder(StarTreeField starTreeField, SegmentWriteState sta this.numDimensions = dimensionsSplitOrder.size(); this.skipStarNodeCreationForDimensions = new HashSet<>(); - this.totalSegmentDocs = state.segmentInfo.maxDoc(); + this.totalSegmentDocs = writeState.segmentInfo.maxDoc(); this.mapperService = mapperService; - this.state = state; + this.writeState = writeState; Set skipStarNodeCreationForDimensions = starTreeFieldSpec.getSkipStarNodeCreationInDims(); @@ -141,6 +165,212 @@ public List generateMetricAggregatorInfos(MapperService ma return metricAggregatorInfos; } + /** + * Generates the configuration required to perform aggregation for all the metrics on a field + * + * @return list of MetricAggregatorInfo + */ + public List getMetricReaders(SegmentWriteState state, Map fieldProducerMap) + throws IOException { + + List metricReaders = new ArrayList<>(); + for (Metric metric : this.starTreeField.getMetrics()) { + for (MetricStat metricStat : metric.getMetrics()) { + SequentialDocValuesIterator metricReader; + FieldInfo metricFieldInfo = state.fieldInfos.fieldInfo(metric.getField()); + if (metricStat != MetricStat.COUNT) { + metricReader = new SequentialDocValuesIterator( + fieldProducerMap.get(metricFieldInfo.name).getSortedNumeric(metricFieldInfo) + ); + } else { + metricReader = new SequentialDocValuesIterator(); + } + + metricReaders.add(metricReader); + } + } + return metricReaders; + } + + /** + * Builds the star tree from the original segment documents + * + * @param fieldProducerMap contain s the docValues producer to get docValues associated with each field + * @param fieldNumberAcrossStarTrees maintains a counter for the number of star-tree fields + * @param starTreeDocValuesConsumer consumes the generated star-tree docValues + * @throws IOException when we are unable to build star-tree + */ + public void build( + Map fieldProducerMap, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException { + long startTime = System.currentTimeMillis(); + logger.debug("Star-tree build is a go with star tree field {}", starTreeField.getName()); + + List metricReaders = getMetricReaders(writeState, fieldProducerMap); + List dimensionsSplitOrder = starTreeField.getDimensionsOrder(); + SequentialDocValuesIterator[] dimensionReaders = new SequentialDocValuesIterator[dimensionsSplitOrder.size()]; + for (int i = 0; i < numDimensions; i++) { + String dimension = dimensionsSplitOrder.get(i).getField(); + FieldInfo dimensionFieldInfo = writeState.fieldInfos.fieldInfo(dimension); + if (dimensionFieldInfo == null) { + dimensionFieldInfo = getFieldInfo(dimension); + } + dimensionReaders[i] = new SequentialDocValuesIterator( + fieldProducerMap.get(dimensionFieldInfo.name).getSortedNumeric(dimensionFieldInfo) + ); + } + Iterator starTreeDocumentIterator = sortAndAggregateSegmentDocuments(dimensionReaders, metricReaders); + logger.debug("Sorting and aggregating star-tree in ms : {}", (System.currentTimeMillis() - startTime)); + build(starTreeDocumentIterator, fieldNumberAcrossStarTrees, starTreeDocValuesConsumer); + logger.debug("Finished Building star-tree in ms : {}", (System.currentTimeMillis() - startTime)); + } + + /** + * Builds the star tree using sorted and aggregated star-tree Documents + * + * @param starTreeDocumentIterator contains the sorted and aggregated documents + * @param fieldNumberAcrossStarTrees maintains a counter for the number of star-tree fields + * @param starTreeDocValuesConsumer consumes the generated star-tree docValues + * @throws IOException when we are unable to build star-tree + */ + public void build( + Iterator starTreeDocumentIterator, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException { + int numSegmentStarTreeDocument = totalSegmentDocs; + + while (starTreeDocumentIterator.hasNext()) { + appendToStarTree(starTreeDocumentIterator.next()); + } + int numStarTreeDocument = numStarTreeDocs; + logger.debug("Generated star tree docs : [{}] from segment docs : [{}]", numStarTreeDocument, numSegmentStarTreeDocument); + + if (numStarTreeDocs == 0) { + // serialize the star tree data + serializeStarTree(numSegmentStarTreeDocument); + return; + } + + constructStarTree(rootNode, 0, numStarTreeDocs); + int numStarTreeDocumentUnderStarNode = numStarTreeDocs - numStarTreeDocument; + logger.debug( + "Finished constructing star-tree, got [ {} ] tree nodes and [ {} ] starTreeDocument under star-node", + numStarTreeNodes, + numStarTreeDocumentUnderStarNode + ); + + createAggregatedDocs(rootNode); + int numAggregatedStarTreeDocument = numStarTreeDocs - numStarTreeDocument - numStarTreeDocumentUnderStarNode; + logger.debug("Finished creating aggregated documents : {}", numAggregatedStarTreeDocument); + + // Create doc values indices in disk + createSortedDocValuesIndices(starTreeDocValuesConsumer, fieldNumberAcrossStarTrees); + + // serialize star-tree + serializeStarTree(numSegmentStarTreeDocument); + } + + private void serializeStarTree(int numSegmentStarTreeDocument) throws IOException { + // serialize the star tree data + long dataFilePointer = dataOut.getFilePointer(); + long totalStarTreeDataLength = StarTreeUtils.writeStarTree(dataOut, rootNode, numStarTreeNodes, starTreeField.getName()); + + // serialize the star tree meta + StarTreeUtils.writeStarTreeMetadata( + metaOut, + starTreeField, + writeState, + metricAggregatorInfos, + numSegmentStarTreeDocument, + dataFilePointer, + totalStarTreeDataLength + ); + } + + private void createSortedDocValuesIndices(DocValuesConsumer docValuesConsumer, AtomicInteger fieldNumberAcrossStarTrees) + throws IOException { + List dimensionWriters = new ArrayList<>(); + List metricWriters = new ArrayList<>(); + FieldInfo[] dimensionFieldInfoList = new FieldInfo[starTreeField.getDimensionsOrder().size()]; + FieldInfo[] metricFieldInfoList = new FieldInfo[metricAggregatorInfos.size()]; + for (int i = 0; i < dimensionFieldInfoList.length; i++) { + final FieldInfo fi = StarTreeUtils.getFieldInfo( + fullyQualifiedFieldNameForStarTreeDimensionsDocValues( + starTreeField.getName(), + starTreeField.getDimensionsOrder().get(i).getField() + ), + fieldNumberAcrossStarTrees.getAndIncrement() + ); + dimensionFieldInfoList[i] = fi; + dimensionWriters.add(new SortedNumericDocValuesWriterWrapper(fi, Counter.newCounter())); + } + for (int i = 0; i < metricAggregatorInfos.size(); i++) { + + final FieldInfo fi = StarTreeUtils.getFieldInfo( + fullyQualifiedFieldNameForStarTreeMetricsDocValues( + starTreeField.getName(), + metricAggregatorInfos.get(i).getField(), + metricAggregatorInfos.get(i).getMetricStat().getTypeName() + ), + fieldNumberAcrossStarTrees.getAndIncrement() + ); + + metricFieldInfoList[i] = fi; + metricWriters.add(new SortedNumericDocValuesWriterWrapper(fi, Counter.newCounter())); + } + + for (int docId = 0; docId < numStarTreeDocs; docId++) { + StarTreeDocument starTreeDocument = getStarTreeDocumentForCreatingDocValues(docId); + for (int i = 0; i < starTreeDocument.dimensions.length; i++) { + Long val = starTreeDocument.dimensions[i]; + if (val != null) { + dimensionWriters.get(i).addValue(docId, val); + } + } + + for (int i = 0; i < starTreeDocument.metrics.length; i++) { + try { + switch (metricAggregatorInfos.get(i).getValueAggregators().getAggregatedValueType()) { + case LONG: + metricWriters.get(i).addValue(docId, (Long) starTreeDocument.metrics[i]); + break; + case DOUBLE: + metricWriters.get(i).addValue(docId, NumericUtils.doubleToSortableLong((Double) starTreeDocument.metrics[i])); + break; + default: + throw new IllegalStateException("Unknown metric doc value type"); + } + } catch (IllegalArgumentException e) { + logger.info("could not parse the value, exiting creation of star tree"); + } + } + } + + addStarTreeDocValueFields(docValuesConsumer, dimensionWriters, dimensionFieldInfoList, starTreeField.getDimensionsOrder().size()); + addStarTreeDocValueFields(docValuesConsumer, metricWriters, metricFieldInfoList, metricAggregatorInfos.size()); + } + + private void addStarTreeDocValueFields( + DocValuesConsumer docValuesConsumer, + List docValuesWriters, + FieldInfo[] fieldInfoList, + int fieldCount + ) throws IOException { + for (int i = 0; i < fieldCount; i++) { + final int increment = i; + DocValuesProducer docValuesProducer = new EmptyDocValuesProducer() { + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) { + return docValuesWriters.get(increment).getDocValues(); + } + }; + docValuesConsumer.addSortedNumericField(fieldInfoList[i], docValuesProducer); + } + } + /** * Adds a document to the star-tree. * @@ -158,6 +388,15 @@ public List generateMetricAggregatorInfos(MapperService ma */ public abstract StarTreeDocument getStarTreeDocument(int docId) throws IOException; + /** + * Returns the star-tree document for the given doc id while creating doc values + * + * @param docId document id + * @return star tree document + * @throws IOException if an I/O error occurs while fetching the star-tree document + */ + public abstract StarTreeDocument getStarTreeDocumentForCreatingDocValues(int docId) throws IOException; + /** * Retrieves the list of star-tree documents in the star-tree. * @@ -179,7 +418,7 @@ public List generateMetricAggregatorInfos(MapperService ma * aggregated star-tree documents. * * @param dimensionReaders List of docValues readers to read dimensions from the segment - * @param metricReaders List of docValues readers to read metrics from the segment + * @param metricReaders List of docValues readers to read metrics from the segment * @return Iterator for the aggregated star-tree document */ public abstract Iterator sortAndAggregateSegmentDocuments( @@ -200,7 +439,6 @@ public abstract Iterator generateStarTreeDocumentsForStarNode( /** * Returns the star-tree document from the segment based on the current doc id - * */ protected StarTreeDocument getSegmentStarTreeDocument( int currentDocId, @@ -290,7 +528,7 @@ protected StarTreeDocument reduceSegmentStarTreeDocuments( if (isMerge) { metrics[i] = metricValueAggregator.getInitialAggregatedValue(segmentDocument.metrics[i]); } else { - metrics[i] = metricValueAggregator.getInitialAggregatedValueForSegmentDocValue(getLong(segmentDocument.metrics[i])); + metrics[i] = metricValueAggregator.getInitialAggregatedValueForSegmentDocValue(getLong(segmentDocument.metrics[i], metricValueAggregator.getIdentityMetricValue())); } } catch (Exception e) { @@ -311,7 +549,7 @@ protected StarTreeDocument reduceSegmentStarTreeDocuments( } else { aggregatedSegmentDocument.metrics[i] = metricValueAggregator.mergeAggregatedValueAndSegmentValue( aggregatedSegmentDocument.metrics[i], - getLong(segmentDocument.metrics[i]) + getLong(segmentDocument.metrics[i], metricValueAggregator.getIdentityMetricValue()) ); } } catch (Exception e) { @@ -326,27 +564,26 @@ protected StarTreeDocument reduceSegmentStarTreeDocuments( /** * Safely converts the metric value of object type to long. * - * @param metric value of the metric + * @param metric value of the metric + * @param identityMetricValue value of the metric which does not really affect the aggregations * @return converted metric value to long */ - private static long getLong(Object metric) { + private static long getLong(Object metric, long identityMetricValue) { - Long metricValue = null; + long metricValue; try { if (metric instanceof Long) { metricValue = (long) metric; } else if (metric != null) { - metricValue = Long.valueOf(String.valueOf(metric)); + metricValue = Long.parseLong(String.valueOf(metric)); + } else { + logger.debug("metric value is null, returning identity metric value for the aggregator"); + return identityMetricValue; } } catch (Exception e) { throw new IllegalStateException("unable to cast segment metric", e); } - if (metricValue == null) { - return 0; - // TODO: handle this properly - // throw new IllegalStateException("unable to cast segment metric"); - } return metricValue; } @@ -389,40 +626,6 @@ public StarTreeDocument reduceStarTreeDocuments(StarTreeDocument aggregatedDocum return aggregatedDocument; } } - - /** - * Builds the star tree from the original segment documents - * - * @param fieldProducerMap contain s the docValues producer to get docValues associated with each field - * - * @throws IOException when we are unable to build star-tree - */ - public void build(Map fieldProducerMap) throws IOException { - long startTime = System.currentTimeMillis(); - logger.debug("Star-tree build is a go with star tree field {}", starTreeField.getName()); - if (totalSegmentDocs == 0) { - logger.debug("No documents found in the segment"); - return; - } - List metricReaders = getMetricReaders(state, fieldProducerMap); - List dimensionsSplitOrder = starTreeField.getDimensionsOrder(); - SequentialDocValuesIterator[] dimensionReaders = new SequentialDocValuesIterator[dimensionsSplitOrder.size()]; - for (int i = 0; i < numDimensions; i++) { - String dimension = dimensionsSplitOrder.get(i).getField(); - FieldInfo dimensionFieldInfo = state.fieldInfos.fieldInfo(dimension); - if (dimensionFieldInfo == null) { - dimensionFieldInfo = getFieldInfo(dimension); - } - dimensionReaders[i] = new SequentialDocValuesIterator( - fieldProducerMap.get(dimensionFieldInfo.name).getSortedNumeric(dimensionFieldInfo) - ); - } - Iterator starTreeDocumentIterator = sortAndAggregateSegmentDocuments(dimensionReaders, metricReaders); - logger.debug("Sorting and aggregating star-tree in ms : {}", (System.currentTimeMillis() - startTime)); - build(starTreeDocumentIterator); - logger.debug("Finished Building star-tree in ms : {}", (System.currentTimeMillis() - startTime)); - } - private static FieldInfo getFieldInfo(String field) { return new FieldInfo( field, @@ -445,73 +648,6 @@ private static FieldInfo getFieldInfo(String field) { ); } - /** - * Generates the configuration required to perform aggregation for all the metrics on a field - * - * @return list of MetricAggregatorInfo - */ - public List getMetricReaders(SegmentWriteState state, Map fieldProducerMap) - throws IOException { - List metricReaders = new ArrayList<>(); - for (Metric metric : this.starTreeField.getMetrics()) { - for (MetricStat metricStat : metric.getMetrics()) { - FieldInfo metricFieldInfo = state.fieldInfos.fieldInfo(metric.getField()); - if (metricFieldInfo == null) { - metricFieldInfo = getFieldInfo(metric.getField()); - } - // TODO - // if (metricStat != MetricStat.COUNT) { - // Need not initialize the metric reader for COUNT metric type - SequentialDocValuesIterator metricReader = new SequentialDocValuesIterator( - fieldProducerMap.get(metricFieldInfo.name).getSortedNumeric(metricFieldInfo) - ); - // } - - metricReaders.add(metricReader); - } - } - return metricReaders; - } - - /** - * Builds the star tree using Star-Tree Document - * - * @param starTreeDocumentIterator contains the sorted and aggregated documents - * @throws IOException when we are unable to build star-tree - */ - void build(Iterator starTreeDocumentIterator) throws IOException { - int numSegmentStarTreeDocument = totalSegmentDocs; - - while (starTreeDocumentIterator.hasNext()) { - appendToStarTree(starTreeDocumentIterator.next()); - } - int numStarTreeDocument = numStarTreeDocs; - logger.debug("Generated star tree docs : [{}] from segment docs : [{}]", numStarTreeDocument, numSegmentStarTreeDocument); - - if (numStarTreeDocs == 0) { - // TODO: Uncomment when segment codec and file formats is ready - // StarTreeBuilderUtils.serializeTree(indexOutput, rootNode, dimensionsSplitOrder, numNodes); - return; - } - - constructStarTree(rootNode, 0, numStarTreeDocs); - int numStarTreeDocumentUnderStarNode = numStarTreeDocs - numStarTreeDocument; - logger.debug( - "Finished constructing star-tree, got [ {} ] tree nodes and [ {} ] starTreeDocument under star-node", - numStarTreeNodes, - numStarTreeDocumentUnderStarNode - ); - - createAggregatedDocs(rootNode); - int numAggregatedStarTreeDocument = numStarTreeDocs - numStarTreeDocument - numStarTreeDocumentUnderStarNode; - logger.debug("Finished creating aggregated documents : {}", numAggregatedStarTreeDocument); - - // TODO: When StarTree Codec is ready - // Create doc values indices in disk - // Serialize and save in disk - // Write star tree metadata for off heap implementation - } - /** * Adds a document to star-tree * @@ -584,7 +720,12 @@ private Map constructNonStarNodes(int startDocId, int endDocId, if (Objects.equals(dimensionValue, nodeDimensionValue) == false) { TreeNode child = getNewNode(); child.dimensionId = dimensionId; - child.dimensionValue = nodeDimensionValue != null ? nodeDimensionValue : ALL; + if (nodeDimensionValue == null) { + child.dimensionValue = ALL; + child.nodeType = StarTreeNodeType.NULL.getValue(); + } else { + child.dimensionValue = nodeDimensionValue != null ? nodeDimensionValue : ALL; + } child.startDocId = nodeStartDocId; child.endDocId = i; nodes.put(nodeDimensionValue, child); @@ -615,7 +756,7 @@ private TreeNode constructStarNode(int startDocId, int endDocId, int dimensionId TreeNode starNode = getNewNode(); starNode.dimensionId = dimensionId; starNode.dimensionValue = ALL; - starNode.isStarNode = true; + starNode.nodeType = StarTreeNodeType.STAR.getValue(); starNode.startDocId = numStarTreeDocs; Iterator starTreeDocumentIterator = generateStarTreeDocumentsForStarNode(startDocId, endDocId, dimensionId); while (starTreeDocumentIterator.hasNext()) { @@ -660,7 +801,7 @@ private StarTreeDocument createAggregatedDocs(TreeNode node) throws IOException if (node.children.containsKey((long) ALL)) { // If it has star child, use the star child aggregated document directly for (TreeNode child : node.children.values()) { - if (child.isStarNode) { + if (child.nodeType == StarTreeNodeType.STAR.getValue()) { aggregatedStarTreeDocument = createAggregatedDocs(child); node.aggregatedDocId = child.aggregatedDocId; } else { diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java index 1599be2e76a56..fec001922edc2 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java @@ -7,8 +7,10 @@ */ package org.opensearch.index.compositeindex.datacube.startree.builder; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexOutput; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; import org.opensearch.index.compositeindex.datacube.Dimension; @@ -24,6 +26,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.concurrent.atomic.AtomicInteger; /** * On heap single tree builder @@ -42,8 +45,14 @@ public class OnHeapStarTreeBuilder extends BaseStarTreeBuilder { * @param segmentWriteState segment write state * @param mapperService helps with the numeric type of field */ - public OnHeapStarTreeBuilder(StarTreeField starTreeField, SegmentWriteState segmentWriteState, MapperService mapperService) { - super(starTreeField, segmentWriteState, mapperService); + public OnHeapStarTreeBuilder( + IndexOutput metaOut, + IndexOutput dataOut, + StarTreeField starTreeField, + SegmentWriteState segmentWriteState, + MapperService mapperService + ) throws IOException { + super(metaOut, dataOut, starTreeField, segmentWriteState, mapperService); } @Override @@ -84,12 +93,16 @@ public Iterator sortAndAggregateSegmentDocuments( // TODO : we can save empty iterator for dimensions which are not part of segment starTreeDocuments[currentDocId] = getSegmentStarTreeDocument(currentDocId, dimensionReaders, metricReaders); } - return sortAndAggregateStarTreeDocuments(starTreeDocuments); + return sortAndAggregateStarTreeDocuments(starTreeDocuments, false); } @Override - public void build(List starTreeValuesSubs) throws IOException { - build(mergeStarTrees(starTreeValuesSubs)); + public void build( + List starTreeValuesSubs, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException { + build(mergeStarTrees(starTreeValuesSubs), fieldNumberAcrossStarTrees, starTreeDocValuesConsumer); } /** @@ -162,8 +175,9 @@ StarTreeDocument[] getSegmentsStarTreeDocuments(List starTreeVal return starTreeDocuments.toArray(starTreeDocumentsArr); } - Iterator sortAndAggregateStarTreeDocuments(StarTreeDocument[] starTreeDocuments) { - return sortAndAggregateStarTreeDocuments(starTreeDocuments, false); + @Override + public StarTreeDocument getStarTreeDocumentForCreatingDocValues(int docId) throws IOException { + return starTreeDocuments.get(docId); } /** diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java index 94c9c9f2efb18..e9d6b4f8dd741 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java @@ -8,6 +8,7 @@ package org.opensearch.index.compositeindex.datacube.startree.builder; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; @@ -16,6 +17,7 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; /** * A star-tree builder that builds a single star-tree. @@ -27,17 +29,29 @@ public interface StarTreeBuilder extends Closeable { /** * Builds the star tree from the original segment documents * - * @param fieldProducerMap contains the docValues producer to get docValues associated with each field + * @param fieldProducerMap contains the docValues producer to get docValues associated with each field + * @param fieldNumberAcrossStarTrees maintains the unique field number across the fields in the star tree + * @param starTreeDocValuesConsumer consumer of star-tree doc values * @throws IOException when we are unable to build star-tree */ - void build(Map fieldProducerMap) throws IOException; + void build( + Map fieldProducerMap, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException; /** - * Builds the star tree using StarTree values from multiple segments + * Builds the star tree using Tree values from multiple segments * * @param starTreeValuesSubs contains the star tree values from multiple segments + * @param fieldNumberAcrossStarTrees maintains the unique field number across the fields in the star tree + * @param starTreeDocValuesConsumer consumer of star-tree doc values * @throws IOException when we are unable to build star-tree */ - void build(List starTreeValuesSubs) throws IOException; + void build( + List starTreeValuesSubs, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException; } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java index 6c3d476aa3a55..80cc8623c8acb 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java @@ -10,8 +10,10 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexOutput; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; @@ -25,6 +27,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; /** * Builder to construct star-trees based on multiple star-tree fields. @@ -39,6 +42,7 @@ public class StarTreesBuilder implements Closeable { private final List starTreeFields; private final SegmentWriteState state; private final MapperService mapperService; + private AtomicInteger fieldNumberAcrossStarTrees; public StarTreesBuilder(SegmentWriteState segmentWriteState, MapperService mapperService) { List starTreeFields = new ArrayList<>(); @@ -58,12 +62,24 @@ public StarTreesBuilder(SegmentWriteState segmentWriteState, MapperService mappe this.starTreeFields = starTreeFields; this.state = segmentWriteState; this.mapperService = mapperService; + this.fieldNumberAcrossStarTrees = new AtomicInteger(); } /** - * Builds the star-trees. + * Builds all star-trees for given star-tree fields. + * + * @param metaOut an IndexInput for star-tree metadata + * @param dataOut an IndexInput for star-tree data + * @param fieldProducerMap fetches iterators for the fields (dimensions and metrics) + * @param starTreeDocValuesConsumer a consumer to write star-tree doc values + * @throws IOException when an error occurs while building the star-trees */ - public void build(Map fieldProducerMap) throws IOException { + public void build( + IndexOutput metaOut, + IndexOutput dataOut, + Map fieldProducerMap, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException { if (starTreeFields.isEmpty()) { logger.debug("no star-tree fields found, returning from star-tree builder"); return; @@ -75,8 +91,8 @@ public void build(Map fieldProducerMap) throws IOExce // Build all star-trees for (StarTreeField starTreeField : starTreeFields) { - try (StarTreeBuilder starTreeBuilder = getSingleTreeBuilder(starTreeField, state, mapperService)) { - starTreeBuilder.build(fieldProducerMap); + try (StarTreeBuilder starTreeBuilder = getSingleTreeBuilder(metaOut, dataOut, starTreeField, state, mapperService)) { + starTreeBuilder.build(fieldProducerMap, fieldNumberAcrossStarTrees, starTreeDocValuesConsumer); } } logger.debug("Took {} ms to build {} star-trees with star-tree fields", System.currentTimeMillis() - startTime, numStarTrees); @@ -90,9 +106,17 @@ public void close() throws IOException { /** * Merges star tree fields from multiple segments * + * @param metaOut an IndexInput for star-tree metadata + * @param dataOut an IndexInput for star-tree data * @param starTreeValuesSubsPerField starTreeValuesSubs per field + * @param starTreeDocValuesConsumer a consumer to write star-tree doc values */ - public void buildDuringMerge(final Map> starTreeValuesSubsPerField) throws IOException { + public void buildDuringMerge( + IndexOutput metaOut, + IndexOutput dataOut, + final Map> starTreeValuesSubsPerField, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException { logger.debug("Starting merge of {} star-trees with star-tree fields", starTreeValuesSubsPerField.size()); long startTime = System.currentTimeMillis(); for (Map.Entry> entry : starTreeValuesSubsPerField.entrySet()) { @@ -102,8 +126,9 @@ public void buildDuringMerge(final Map> starTreeVal continue; } StarTreeField starTreeField = starTreeValuesList.get(0).getStarTreeField(); - StarTreeBuilder builder = getSingleTreeBuilder(starTreeField, state, mapperService); - builder.build(starTreeValuesList); + StarTreeBuilder builder = getSingleTreeBuilder(metaOut, dataOut, starTreeField, state, mapperService); + builder.build(starTreeValuesList, fieldNumberAcrossStarTrees, starTreeDocValuesConsumer); + builder.close(); builder.close(); } logger.debug( @@ -116,11 +141,16 @@ public void buildDuringMerge(final Map> starTreeVal /** * Get star-tree builder based on build mode. */ - StarTreeBuilder getSingleTreeBuilder(StarTreeField starTreeField, SegmentWriteState state, MapperService mapperService) - throws IOException { + StarTreeBuilder getSingleTreeBuilder( + IndexOutput metaOut, + IndexOutput dataOut, + StarTreeField starTreeField, + SegmentWriteState state, + MapperService mapperService + ) throws IOException { switch (starTreeField.getStarTreeConfig().getBuildMode()) { case ON_HEAP: - return new OnHeapStarTreeBuilder(starTreeField, state, mapperService); + return new OnHeapStarTreeBuilder(metaOut, dataOut, starTreeField, state, mapperService); case OFF_HEAP: // TODO // return new OffHeapStarTreeBuilder(starTreeField, state, mapperService); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/MetricEntry.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/MetricEntry.java new file mode 100644 index 0000000000000..5184dc57174b2 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/MetricEntry.java @@ -0,0 +1,33 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.meta; + +/** + * Holds the pair of metric name and it's associated stat + * + * @opensearch.experimental + */ +public class MetricEntry { + + private final int metricFieldNumber; + private final int metricStatOrdinal; + + public MetricEntry(int metricFieldNumber, int metricStatOrdinal) { + this.metricFieldNumber = metricFieldNumber; + this.metricStatOrdinal = metricStatOrdinal; + } + + public int getMetricFieldNumber() { + return metricFieldNumber; + } + + public int getMetricStatOrdinal() { + return metricStatOrdinal; + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetadata.java new file mode 100644 index 0000000000000..2dc306d651727 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetadata.java @@ -0,0 +1,230 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.meta; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.store.IndexInput; +import org.opensearch.index.compositeindex.CompositeIndexMetadata; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.mapper.CompositeMappedFieldType; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Holds the associated metadata for the building of star-tree + * + * @opensearch.experimental + */ +public class StarTreeMetadata extends CompositeIndexMetadata implements TreeMetadata { + private static final Logger logger = LogManager.getLogger(TreeMetadata.class); + private final IndexInput meta; + private final String starTreeFieldName; + private final String starTreeFieldType; + private final List dimensionFieldNumbers; + private final List metricEntries; + private final Integer segmentAggregatedDocCount; + private final Integer maxLeafDocs; + private final Set skipStarNodeCreationInDims; + private final StarTreeFieldConfiguration.StarTreeBuildMode starTreeBuildMode; + private final long dataStartFilePointer; + private final long dataLength; + + public StarTreeMetadata(IndexInput meta, String compositeFieldName, CompositeMappedFieldType.CompositeFieldType compositeFieldType) + throws IOException { + super(compositeFieldName, compositeFieldType); + this.meta = meta; + try { + this.starTreeFieldName = this.getCompositeFieldName(); + this.starTreeFieldType = this.getCompositeFieldType().getName(); + this.dimensionFieldNumbers = readStarTreeDimensions(); + this.metricEntries = readMetricEntries(); + this.segmentAggregatedDocCount = readSegmentAggregatedDocCount(); + this.maxLeafDocs = readMaxLeafDocs(); + this.skipStarNodeCreationInDims = readSkipStarNodeCreationInDims(); + this.starTreeBuildMode = readBuildMode(); + this.dataStartFilePointer = readDataStartFilePointer(); + this.dataLength = readDataLength(); + } catch (Exception e) { + logger.error("Unable to read star-tree metadata from the file"); + throw new CorruptIndexException("Unable to read star-tree metadata from the file", meta); + } + } + + @Override + public int readDimensionsCount() throws IOException { + return meta.readVInt(); + } + + @Override + public List readStarTreeDimensions() throws IOException { + int dimensionCount = readDimensionsCount(); + List dimensionFieldNumbers = new ArrayList<>(); + + for (int i = 0; i < dimensionCount; i++) { + dimensionFieldNumbers.add(meta.readVInt()); + } + + return dimensionFieldNumbers; + } + + @Override + public int readMetricsCount() throws IOException { + return meta.readVInt(); + } + + @Override + public List readMetricEntries() throws IOException { + int metricCount = readMetricsCount(); + List metricEntries = new ArrayList<>(); + + for (int i = 0; i < metricCount; i++) { + int metricFieldNumber = meta.readVInt(); + int metricStatOrdinal = meta.readVInt(); + metricEntries.add(new MetricEntry(metricFieldNumber, metricStatOrdinal)); + } + + return metricEntries; + } + + @Override + public int readSegmentAggregatedDocCount() throws IOException { + return meta.readVInt(); + } + + @Override + public int readMaxLeafDocs() throws IOException { + return meta.readVInt(); + } + + @Override + public int readSkipStarNodeCreationInDimsCount() throws IOException { + return meta.readVInt(); + } + + @Override + public Set readSkipStarNodeCreationInDims() throws IOException { + + int skipStarNodeCreationInDimsCount = readSkipStarNodeCreationInDimsCount(); + Set skipStarNodeCreationInDims = new HashSet<>(); + for (int i = 0; i < skipStarNodeCreationInDimsCount; i++) { + skipStarNodeCreationInDims.add(meta.readVInt()); + } + return skipStarNodeCreationInDims; + } + + @Override + public StarTreeFieldConfiguration.StarTreeBuildMode readBuildMode() throws IOException { + return StarTreeFieldConfiguration.StarTreeBuildMode.fromBuildModeOrdinal(meta.readByte()); + } + + @Override + public long readDataStartFilePointer() throws IOException { + return meta.readVLong(); + } + + @Override + public long readDataLength() throws IOException { + return meta.readVLong(); + } + + /** + * Returns the name of the star-tree field. + * + * @return star-tree field name + */ + public String getStarTreeFieldName() { + return starTreeFieldName; + } + + /** + * Returns the type of the star tree field. + * + * @return star-tree field type + */ + public String getStarTreeFieldType() { + return starTreeFieldType; + } + + /** + * Returns the list of dimension field numbers. + * + * @return star-tree dimension field numbers + */ + public List getDimensionFieldNumbers() { + return dimensionFieldNumbers; + } + + /** + * Returns the list of metric entries. + * + * @return star-tree metric entries + */ + public List getMetricEntries() { + return metricEntries; + } + + /** + * Returns the aggregated document count for the star-tree. + * + * @return the aggregated document count for the star-tree. + */ + public Integer getSegmentAggregatedDocCount() { + return segmentAggregatedDocCount; + } + + /** + * Returns the max leaf docs for the star-tree. + * + * @return the max leaf docs. + */ + public Integer getMaxLeafDocs() { + return maxLeafDocs; + } + + /** + * Returns the set of dimensions for which star node will not be created in the star-tree. + * + * @return the set of dimensions. + */ + public Set getSkipStarNodeCreationInDims() { + return skipStarNodeCreationInDims; + } + + /** + * Returns the build mode for the star-tree. + * + * @return the star-tree build mode. + */ + public StarTreeFieldConfiguration.StarTreeBuildMode getStarTreeBuildMode() { + return starTreeBuildMode; + } + + /** + * Returns the file pointer to the start of the star-tree data. + * + * @return start file pointer for star-tree data + */ + public long getDataStartFilePointer() { + return dataStartFilePointer; + } + + /** + * Returns the length of star-tree data + * + * @return star-tree length + */ + public long getDataLength() { + return dataLength; + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/TreeMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/TreeMetadata.java new file mode 100644 index 0000000000000..fa2dc8ad22562 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/TreeMetadata.java @@ -0,0 +1,111 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.meta; + +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; + +import java.io.IOException; +import java.util.List; +import java.util.Set; + +/** + * An interface for metadata of the star-tree + * + * @opensearch.experimental + */ +public interface TreeMetadata { + + /** + * Reads the count of dimensions in the star-tree. + * + * @return the count of dimensions + * @throws IOException if an I/O error occurs while reading the dimensions count + */ + int readDimensionsCount() throws IOException; + + /** + * Reads the list of dimension ordinals in the star-tree. + * + * @return the list of dimension ordinals + * @throws IOException if an I/O error occurs while reading the dimension ordinals + */ + List readStarTreeDimensions() throws IOException; + + /** + * Reads the count of metrics in the star-tree. + * + * @return the count of metrics + * @throws IOException if an I/O error occurs while reading the metrics count + */ + int readMetricsCount() throws IOException; + + /** + * Reads the list of metric entries in the star-tree. + * + * @return the list of metric entries + * @throws IOException if an I/O error occurs while reading the metric entries + */ + List readMetricEntries() throws IOException; + + /** + * Reads the aggregated document count for the segment in the star-tree. + * + * @return the aggregated document count for the segment + * @throws IOException if an I/O error occurs while reading the aggregated document count + */ + int readSegmentAggregatedDocCount() throws IOException; + + /** + * Reads the max leaf docs for the star-tree. + * + * @return the max leaf docs for the star-tree + * @throws IOException if an I/O error occurs while reading the max leaf docs + */ + int readMaxLeafDocs() throws IOException; + + /** + * Reads the count of dimensions where star node will not be created in the star-tree. + * + * @return the count of dimensions + * @throws IOException if an I/O error occurs while reading the skip star node dimensions count + */ + int readSkipStarNodeCreationInDimsCount() throws IOException; + + /** + * Reads the list of dimensions field numbers to be skipped for star node creation in the star-tree. + * + * @return the set of dimensions field numbers to be skipped for star node creation. + * @throws IOException if an I/O error occurs while reading the dimensions + */ + Set readSkipStarNodeCreationInDims() throws IOException; + + /** + * Reads the build mode for the star-tree. + * + * @return the star-tree build mode + * @throws IOException if an I/O error occurs while reading the build mode + */ + StarTreeFieldConfiguration.StarTreeBuildMode readBuildMode() throws IOException; + + /** + * Reads the file pointer to the start of the star-tree data. + * + * @return the file pointer to the start of the star-tree data + * @throws IOException if an I/O error occurs while reading the star-tree data start file pointer + */ + long readDataStartFilePointer() throws IOException; + + /** + * Reads the length of the data of the star-tree. + * + * @return the length of the data + * @throws IOException if an I/O error occurs while reading the data length + */ + long readDataLength() throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/package-info.java new file mode 100644 index 0000000000000..568cacea4b59a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/package-info.java @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Meta package for star tree + * + * @opensearch.experimental + */ +package org.opensearch.index.compositeindex.datacube.startree.meta; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java new file mode 100644 index 0000000000000..4c22e1a2d19dc --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java @@ -0,0 +1,186 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.node; + +import org.apache.lucene.store.RandomAccessInput; + +import java.io.IOException; +import java.util.Iterator; + +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; + +/** + * Fixed Length implementation of {@link StarTreeNode} + * + * @opensearch.experimental + */ +public class FixedLengthStarTreeNode implements StarTreeNode { + public static final int NUM_INT_SERIALIZABLE_FIELDS = 6; + public static final int NUM_LONG_SERIALIZABLE_FIELDS = 1; + public static final int NUM_BYTE_SERIALIZABLE_FIELDS = 1; + public static final long SERIALIZABLE_DATA_SIZE_IN_BYTES = (Integer.BYTES * NUM_INT_SERIALIZABLE_FIELDS) + (Long.BYTES + * NUM_LONG_SERIALIZABLE_FIELDS) + (NUM_BYTE_SERIALIZABLE_FIELDS * Byte.BYTES); + private static final int DIMENSION_ID_OFFSET = 0; + private static final int DIMENSION_VALUE_OFFSET = DIMENSION_ID_OFFSET + Integer.BYTES; + private static final int START_DOC_ID_OFFSET = DIMENSION_VALUE_OFFSET + Long.BYTES; + private static final int END_DOC_ID_OFFSET = START_DOC_ID_OFFSET + Integer.BYTES; + private static final int AGGREGATE_DOC_ID_OFFSET = END_DOC_ID_OFFSET + Integer.BYTES; + private static final int STAR_NODE_TYPE_OFFSET = AGGREGATE_DOC_ID_OFFSET + Byte.BYTES; + private static final int FIRST_CHILD_ID_OFFSET = STAR_NODE_TYPE_OFFSET + Integer.BYTES; + private static final int LAST_CHILD_ID_OFFSET = FIRST_CHILD_ID_OFFSET + Integer.BYTES; + + public static final int INVALID_ID = -1; + + private final int nodeId; + private final int firstChildId; + + RandomAccessInput in; + + public FixedLengthStarTreeNode(RandomAccessInput in, int nodeId) throws IOException { + this.in = in; + this.nodeId = nodeId; + firstChildId = getInt(FIRST_CHILD_ID_OFFSET); + } + + private int getInt(int fieldOffset) throws IOException { + return in.readInt(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); + } + + private long getLong(int fieldOffset) throws IOException { + return in.readLong(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); + } + + private byte getByte(int fieldOffset) throws IOException { + return in.readByte(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); + } + + @Override + public int getDimensionId() throws IOException { + return getInt(DIMENSION_ID_OFFSET); + } + + @Override + public long getDimensionValue() throws IOException { + return getLong(DIMENSION_VALUE_OFFSET); + } + + @Override + public int getChildDimensionId() throws IOException { + if (firstChildId == INVALID_ID) { + return INVALID_ID; + } else { + return in.readInt(firstChildId * SERIALIZABLE_DATA_SIZE_IN_BYTES); + } + } + + @Override + public int getStartDocId() throws IOException { + return getInt(START_DOC_ID_OFFSET); + } + + @Override + public int getEndDocId() throws IOException { + return getInt(END_DOC_ID_OFFSET); + } + + @Override + public int getAggregatedDocId() throws IOException { + return getInt(AGGREGATE_DOC_ID_OFFSET); + } + + @Override + public int getNumChildren() throws IOException { + if (firstChildId == INVALID_ID) { + return 0; + } else { + return getInt(LAST_CHILD_ID_OFFSET) - firstChildId + 1; + } + } + + @Override + public boolean isLeaf() { + return firstChildId == INVALID_ID; + } + + @Override + public byte getStarTreeNodeType() throws IOException { + return getByte(STAR_NODE_TYPE_OFFSET); + } + + @Override + public StarTreeNode getChildForDimensionValue(long dimensionValue, boolean isStar) throws IOException { + // there will be no children for leaf nodes + if (isLeaf()) { + return null; + } + + // Specialize star node for performance + if (isStar) { + return handleStarNode(); + } + + return binarySearchChild(dimensionValue); + } + + private FixedLengthStarTreeNode handleStarNode() throws IOException { + FixedLengthStarTreeNode firstNode = new FixedLengthStarTreeNode(in, firstChildId); + if (firstNode.getDimensionValue() == ALL) { + return firstNode; + } else { + return null; + } + } + + private FixedLengthStarTreeNode binarySearchChild(long dimensionValue) throws IOException { + // Binary search to find child node + int low = firstChildId; + int high = getInt(LAST_CHILD_ID_OFFSET); + + while (low <= high) { + int mid = low + (high - low) / 2; + FixedLengthStarTreeNode midNode = new FixedLengthStarTreeNode(in, mid); + long midNodeDimensionValue = midNode.getDimensionValue(); + + if (midNodeDimensionValue == dimensionValue) { + return midNode; + } else if (midNodeDimensionValue < dimensionValue) { + low = mid + 1; + } else { + high = mid - 1; + } + } + return null; + } + + @Override + public Iterator getChildrenIterator() throws IOException { + return new Iterator<>() { + private int currentChildId = firstChildId; + private final int lastChildId = getInt(LAST_CHILD_ID_OFFSET); + + @Override + public boolean hasNext() { + return currentChildId <= lastChildId; + } + + @Override + public FixedLengthStarTreeNode next() { + try { + return new FixedLengthStarTreeNode(in, currentChildId++); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java new file mode 100644 index 0000000000000..21041841cf789 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java @@ -0,0 +1,62 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.node; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.RandomAccessInput; +import org.opensearch.index.compositeindex.datacube.startree.meta.StarTreeMetadata; + +import java.io.IOException; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.MAGIC_MARKER; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.VERSION; + +/** + * Off heap implementation of the star-tree. + * + * @opensearch.experimental + */ +public class StarTree implements Tree { + private static final Logger logger = LogManager.getLogger(StarTree.class); + private final FixedLengthStarTreeNode root; + private final Integer numNodes; + + public StarTree(IndexInput data, StarTreeMetadata starTreeMetadata) throws IOException { + long magicMarker = data.readLong(); + if (MAGIC_MARKER != magicMarker) { + logger.error("Invalid magic marker"); + throw new IOException("Invalid magic marker"); + } + int version = data.readVInt(); + if (VERSION != version) { + logger.error("Invalid star tree version"); + throw new IOException("Invalid version"); + } + numNodes = data.readVInt(); // num nodes + + RandomAccessInput in = data.randomAccessSlice(starTreeMetadata.getDataStartFilePointer(), starTreeMetadata.getDataLength()); + root = new FixedLengthStarTreeNode(in, 0); + } + + @Override + public StarTreeNode getRoot() { + return root; + } + + /** + * Returns the number of nodes in star-tree + * + * @return number of nodes in te star-tree + */ + public Integer getNumNodes() { + return numNodes; + } + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java index 59522ffa4be89..3c2acadff6a96 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java @@ -20,7 +20,6 @@ */ @ExperimentalApi public interface StarTreeNode { - long ALL = -1l; /** * Returns the dimension ID of the current star-tree node. @@ -86,12 +85,12 @@ public interface StarTreeNode { boolean isLeaf(); /** - * Checks if the current node is a star node. + * Checks if the current node is a star node, null node or a node with actual dimension value. * - * @return true if the node is a star node, false otherwise - * @throws IOException if an I/O error occurs while reading the star node status + * @return the node type value based on the star-tree node type + * @throws IOException if an I/O error occurs while reading the node type */ - boolean isStarNode() throws IOException; + byte getStarTreeNodeType() throws IOException; /** * Returns the child star-tree node for the given dimension value. @@ -100,7 +99,7 @@ public interface StarTreeNode { * @return the child node for the given dimension value or null if child is not present * @throws IOException if an I/O error occurs while retrieving the child node */ - StarTreeNode getChildForDimensionValue(long dimensionValue) throws IOException; + StarTreeNode getChildForDimensionValue(long dimensionValue, boolean isStar) throws IOException; /** * Returns an iterator over the children of the current star-tree node. diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java new file mode 100644 index 0000000000000..856fee072cd9e --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java @@ -0,0 +1,90 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.node; + +/** + * Represents the different types of nodes in a StarTree data structure. + *

+ * In order to handle different node types, we use a byte value to represent the node type. + * This enum provides a convenient way to map byte values to their corresponding node types. + *

+ * Star and Null Nodes are represented as special cases. Default is the general case. + * Star and Null nodes are represented with negative ordinals so that first node is Star, second node is Null Node + * and the rest of the default nodes are sorted based on dimension values. + *

+ * By default, we want to consider nodes as default node. + * + * @opensearch.experimental + * @see StarTreeNode + */ +public enum StarTreeNodeType { + + /** + * Represents a star node type. + * + */ + STAR("star", (byte) -2), + + /** + * Represents a null node type. + */ + NULL("null", (byte) -1), + + /** + * Represents a default node type. + */ + DEFAULT("default", (byte) 0); + + private final String name; + private final byte value; + + /** + * Constructs a new StarTreeNodeType with the given name and value. + * + * @param name the name of the node type + * @param value the value associated with the node type + */ + StarTreeNodeType(String name, byte value) { + this.name = name; + this.value = value; + } + + /** + * Returns the name of the node type. + * + * @return the name of the node type + */ + public String getName() { + return name; + } + + /** + * Returns the value associated with the node type. + * + * @return the value associated with the node type + */ + public byte getValue() { + return value; + } + + /** + * Returns the StarTreeNodeType enum constant with the specified value. + * + * @param value the value of the enum constant to return + * @return the enum constant with the specified value, or null if no such constant exists + */ + public static StarTreeNodeType fromValue(byte value) { + for (StarTreeNodeType nodeType : StarTreeNodeType.values()) { + if (nodeType.getValue() == value) { + return nodeType; + } + } + return null; + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/Tree.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/Tree.java new file mode 100644 index 0000000000000..811150da64bec --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/Tree.java @@ -0,0 +1,23 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.node; + +/** + * Interface for star-tree. + * + * @opensearch.experimental + */ +public interface Tree { + + /** + * Fetches the root node of the star-tree. + * @return the root of the star-tree + */ + StarTreeNode getRoot(); + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/package-info.java index 516d5b5a012ab..19d12bc6318d7 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/package-info.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/package-info.java @@ -8,5 +8,7 @@ /** * Holds classes associated with star tree node + * + * @opensearch.experimental */ package org.opensearch.index.compositeindex.datacube.startree.node; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java index 400d7a1c00104..c76f1e0bbf8e6 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java @@ -9,6 +9,7 @@ package org.opensearch.index.compositeindex.datacube.startree.utils; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.DocIdSetIterator; import org.opensearch.common.annotation.ExperimentalApi; @@ -28,6 +29,11 @@ public class SequentialDocValuesIterator { */ private final DocIdSetIterator docIdSetIterator; + /** + * The value associated with the latest document. + */ + private Long docValue; + /** * The id of the latest document. */ @@ -42,15 +48,50 @@ public SequentialDocValuesIterator(DocIdSetIterator docIdSetIterator) { this.docIdSetIterator = docIdSetIterator; } + /** + * Constructs a new SequentialDocValuesIterator instance with the given SortedNumericDocValues. + * + */ + public SequentialDocValuesIterator() { + this.docIdSetIterator = DocValues.emptySortedNumeric(); + } + + /** + * Returns the value associated with the latest document. + * + * @return the value associated with the latest document + */ + public Long getDocValue() { + return docValue; + } + + /** + * Sets the value associated with the latest document. + * + * @param docValue the value to be associated with the latest document + */ + public void setDocValue(Long docValue) { + this.docValue = docValue; + } + /** * Returns the id of the latest document. * * @return the id of the latest document */ - int getDocId() { + public int getDocId() { return docId; } + /** + * Sets the id of the latest document. + * + * @param docId the ID of the latest document + */ + public void setDocId(int docId) { + this.docId = docId; + } + /** * Returns the DocIdSetIterator associated with this instance. * @@ -65,7 +106,7 @@ public int nextDoc(int currentDocId) throws IOException { if (docId >= currentDocId) { return docId; } - docId = this.docIdSetIterator.nextDoc(); + setDocId(this.docIdSetIterator.nextDoc()); return docId; } @@ -81,7 +122,12 @@ public Long value(int currentDocId) throws IOException { if (docId == DocIdSetIterator.NO_MORE_DOCS || docId != currentDocId) { return null; } - return sortedNumericDocValues.nextValue(); + if (docValue == null) { + docValue = sortedNumericDocValues.nextValue(); + } + Long nextValue = docValue; + docValue = null; + return nextValue; } else { throw new IllegalStateException("Unsupported Iterator requested for SequentialDocValuesIterator"); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataWriter.java new file mode 100644 index 0000000000000..89148f3c20ab5 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataWriter.java @@ -0,0 +1,141 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.utils; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.store.IndexOutput; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.MAGIC_MARKER; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.VERSION; +import static org.opensearch.index.compositeindex.datacube.startree.node.FixedLengthStarTreeNode.SERIALIZABLE_DATA_SIZE_IN_BYTES; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; + +/** + * Utility class for serializing a star-tree data structure. + * + * @opensearch.experimental + */ +public class StarTreeDataWriter { + + private static final Logger logger = LogManager.getLogger(StarTreeDataWriter.class); + + /** + * Writes the star-tree data structure. + * + * @param indexOutput the IndexOutput to write the star-tree data + * @param rootNode the root node of the star-tree + * @param numNodes the total number of nodes in the star-tree + * @param name the name of the star-tree field + * @return the total size in bytes of the serialized star-tree data + * @throws IOException if an I/O error occurs while writing the star-tree data + */ + public static long writeStarTree(IndexOutput indexOutput, TreeNode rootNode, int numNodes, String name) throws IOException { + long totalSizeInBytes = 0L; + totalSizeInBytes += computeStarTreeDataHeaderByteSize(); + totalSizeInBytes += (long) numNodes * SERIALIZABLE_DATA_SIZE_IN_BYTES; + + if (logger.isDebugEnabled()) { + logger.debug("Star tree data size in bytes : {} for star-tree field {}", totalSizeInBytes, name); + } + + writeStarTreeHeader(indexOutput, numNodes); + writeStarTreeNodes(indexOutput, rootNode); + return totalSizeInBytes; + } + + /** + * Computes the byte size of the star-tree data header. + * + * @return the byte size of the star-tree data header + */ + private static int computeStarTreeDataHeaderByteSize() { + // Magic marker (8), version (4) + int headerSizeInBytes = 12; + + // For number of nodes. + headerSizeInBytes += Integer.BYTES; + return headerSizeInBytes; + } + + /** + * Writes the star-tree data header. + * + * @param output the IndexOutput to write the header + * @param numNodes the total number of nodes in the star-tree + * @throws IOException if an I/O error occurs while writing the header + */ + private static void writeStarTreeHeader(IndexOutput output, int numNodes) throws IOException { + output.writeLong(MAGIC_MARKER); + output.writeInt(VERSION); + output.writeInt(numNodes); + } + + /** + * Writes the star-tree nodes in a breadth-first order. + * + * @param output the IndexOutput to write the nodes + * @param rootNode the root node of the star-tree + * @throws IOException if an I/O error occurs while writing the nodes + */ + private static void writeStarTreeNodes(IndexOutput output, TreeNode rootNode) throws IOException { + Queue queue = new LinkedList<>(); + queue.add(rootNode); + + int currentNodeId = 0; + while (!queue.isEmpty()) { + TreeNode node = queue.remove(); + + if (node.children == null || node.children.isEmpty()) { + writeStarTreeNode(output, node, ALL, ALL); + } else { + + // Sort all children nodes based on dimension value + List sortedChildren = new ArrayList<>(node.children.values()); + sortedChildren.sort(Comparator.comparingInt(TreeNode::getNodeType).thenComparingLong(TreeNode::getDimensionValue)); + + int firstChildId = currentNodeId + queue.size() + 1; + int lastChildId = firstChildId + sortedChildren.size() - 1; + writeStarTreeNode(output, node, firstChildId, lastChildId); + + queue.addAll(sortedChildren); + } + + currentNodeId++; + } + } + + /** + * Writes a single star-tree node + * + * @param output the IndexOutput to write the node + * @param node the star tree node to write + * @param firstChildId the ID of the first child node + * @param lastChildId the ID of the last child node + * @throws IOException if an I/O error occurs while writing the node + */ + private static void writeStarTreeNode(IndexOutput output, TreeNode node, int firstChildId, int lastChildId) throws IOException { + output.writeInt(node.dimensionId); + output.writeLong(node.dimensionValue); + output.writeInt(node.startDocId); + output.writeInt(node.endDocId); + output.writeInt(node.aggregatedDocId); + output.writeByte(node.nodeType); + output.writeInt(firstChildId); + output.writeInt(lastChildId); + } + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaWriter.java new file mode 100644 index 0000000000000..a21939798b5e4 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaWriter.java @@ -0,0 +1,167 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.utils; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; +import org.opensearch.index.mapper.CompositeMappedFieldType; + +import java.io.IOException; +import java.util.List; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.MAGIC_MARKER; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.VERSION; + +/** + * The utility class for serializing the metadata of a star-tree data structure. + * The metadata includes information about the dimensions, metrics, and other relevant details + * related to the star tree. + * + * @opensearch.experimental + */ +public class StarTreeMetaWriter { + + private static final Logger logger = LogManager.getLogger(StarTreeMetaWriter.class); + + /** + * Writes the star-tree metadata. + * + * @param metaOut the IndexOutput to write the metadata + * @param starTreeField the star-tree field + * @param writeState the segment write state + * @param metricAggregatorInfos the list of metric aggregator information + * @param segmentAggregatedCount the aggregated document count for the segment + * @param dataFilePointer the file pointer to the start of the star tree data + * @param dataFileLength the length of the star tree data file + * @throws IOException if an I/O error occurs while serializing the metadata + */ + public static void writeStarTreeMetadata( + IndexOutput metaOut, + StarTreeField starTreeField, + SegmentWriteState writeState, + List metricAggregatorInfos, + Integer segmentAggregatedCount, + long dataFilePointer, + long dataFileLength + ) throws IOException { + + long initialMetaFilePointer = metaOut.getFilePointer(); + + writeMetaHeader(metaOut, CompositeMappedFieldType.CompositeFieldType.STAR_TREE, starTreeField.getName()); + writeMeta(metaOut, writeState, metricAggregatorInfos, starTreeField, segmentAggregatedCount, dataFilePointer, dataFileLength); + + if (logger.isDebugEnabled()) { + logger.debug( + "Star tree meta size in bytes : {} for star-tree field {}", + metaOut.getFilePointer() - initialMetaFilePointer, + starTreeField.getName() + ); + } + } + + /** + * Writes the star-tree metadata header. + * + * @param metaOut the IndexOutput to write the header + * @param compositeFieldType the composite field type of the star-tree field + * @param starTreeFieldName the name of the star-tree field + * @throws IOException if an I/O error occurs while writing the header + */ + private static void writeMetaHeader( + IndexOutput metaOut, + CompositeMappedFieldType.CompositeFieldType compositeFieldType, + String starTreeFieldName + ) throws IOException { + // magic marker for sanity + metaOut.writeVLong(MAGIC_MARKER); + + // version + metaOut.writeVInt(VERSION); + + // star tree field name + metaOut.writeString(starTreeFieldName); + + // star tree field type + metaOut.writeString(compositeFieldType.getName()); + } + + /** + * Writes the star-tree metadata. + * + * @param metaOut the IndexOutput to write the metadata + * @param writeState the segment write state + * @param metricAggregatorInfos the list of metric aggregator information + * @param starTreeField the star tree field + * @param segmentAggregatedDocCount the aggregated document count for the segment + * @param dataFilePointer the file pointer to the start of the star-tree data + * @param dataFileLength the length of the star-tree data file + * @throws IOException if an I/O error occurs while writing the metadata + */ + private static void writeMeta( + IndexOutput metaOut, + SegmentWriteState writeState, + List metricAggregatorInfos, + StarTreeField starTreeField, + Integer segmentAggregatedDocCount, + long dataFilePointer, + long dataFileLength + ) throws IOException { + + // number of dimensions + metaOut.writeVInt(starTreeField.getDimensionsOrder().size()); + + // dimensions + for (Dimension dimension : starTreeField.getDimensionsOrder()) { + int dimensionFieldNumber = writeState.fieldInfos.fieldInfo(dimension.getField()).getFieldNumber(); + metaOut.writeVInt(dimensionFieldNumber); + } + + // number of metrics + metaOut.writeVInt(metricAggregatorInfos.size()); + + // metric - metric stat pair + for (MetricAggregatorInfo metricAggregatorInfo : metricAggregatorInfos) { + String metricName = metricAggregatorInfo.getField(); + int metricFieldNumber = writeState.fieldInfos.fieldInfo(metricName).getFieldNumber(); + int metricStatOrdinal = metricAggregatorInfo.getMetricStat().getMetricOrdinal(); + metaOut.writeVInt(metricFieldNumber); + metaOut.writeVInt(metricStatOrdinal); + } + + // segment aggregated document count + metaOut.writeVInt(segmentAggregatedDocCount); + + // max leaf docs + metaOut.writeVInt(starTreeField.getStarTreeConfig().maxLeafDocs()); + + // number of skip star node creation dimensions + metaOut.writeVInt(starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims().size()); + + // skip star node creations + for (String dimension : starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims()) { + int dimensionFieldNumber = writeState.fieldInfos.fieldInfo(dimension).getFieldNumber(); + metaOut.writeVInt(dimensionFieldNumber); + } + + // star tree build-mode + metaOut.writeByte(starTreeField.getStarTreeConfig().getBuildMode().getBuildModeOrdinal()); + + // star-tree data file pointer + metaOut.writeVLong(dataFilePointer); + + // star-tree data file length + metaOut.writeVLong(dataFileLength); + + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java new file mode 100644 index 0000000000000..85b63b403fcbb --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java @@ -0,0 +1,161 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.utils; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; + +/** + * Util class for building star tree + * + * @opensearch.experimental + */ +public class StarTreeUtils { + + private StarTreeUtils() {} + + public static final int ALL = -1; + + /** + * Write star tree to index output stream + * + * @param dataOut data index output + * @param rootNode root star-tree node + * @param numNodes number of nodes in the tree + * @param name name of the star-tree field + * @return total size of the three + * @throws IOException when star-tree data serialization fails + */ + public static long writeStarTree(IndexOutput dataOut, TreeNode rootNode, int numNodes, String name) throws IOException { + return StarTreeDataWriter.writeStarTree(dataOut, rootNode, numNodes, name); + } + + /** + * Write star tree metadata to index output stream + * + * @param metaOut meta index output + * @param starTreeField star tree field + * @param writeState segment write state + * @param metricAggregatorInfos metric aggregator infos + * @param segmentAggregatedCount segment aggregated count + * @param dataFilePointer data file pointer + * @param dataFileLength data file length + * @throws IOException when star-tree data serialization fails + */ + public static void writeStarTreeMetadata( + IndexOutput metaOut, + StarTreeField starTreeField, + SegmentWriteState writeState, + List metricAggregatorInfos, + Integer segmentAggregatedCount, + long dataFilePointer, + long dataFileLength + ) throws IOException { + StarTreeMetaWriter.writeStarTreeMetadata( + metaOut, + starTreeField, + writeState, + metricAggregatorInfos, + segmentAggregatedCount, + dataFilePointer, + dataFileLength + ); + } + + /** + * The suffix appended to dimension field names in the Star Tree index. + */ + public static final String DIMENSION_SUFFIX = "dim"; + + /** + * The suffix appended to metric field names in the Star Tree index. + */ + public static final String METRIC_SUFFIX = "metric"; + + /** + * Returns the full field name for a dimension in the star-tree index. + * + * @param starTreeFieldName star-tree field name + * @param dimensionName name of the dimension + * @return full field name for the dimension in the star-tree index + */ + public static String fullyQualifiedFieldNameForStarTreeDimensionsDocValues(String starTreeFieldName, String dimensionName) { + return starTreeFieldName + "_" + dimensionName + "_" + DIMENSION_SUFFIX; + } + + /** + * Returns the full field name for a metric in the star-tree index. + * + * @param starTreeFieldName star-tree field name + * @param fieldName name of the metric field + * @param metricName name of the metric + * @return full field name for the metric in the star-tree index + */ + public static String fullyQualifiedFieldNameForStarTreeMetricsDocValues(String starTreeFieldName, String fieldName, String metricName) { + return MetricAggregatorInfo.toFieldName(starTreeFieldName, fieldName, metricName) + "_" + METRIC_SUFFIX; + } + + /** + * Get field infos from field names + * + * @param fields field names + * @return field infos + */ + public static FieldInfo[] getFieldInfoList(List fields) { + FieldInfo[] fieldInfoList = new FieldInfo[fields.size()]; + + // field number is not really used. We depend on unique field names to get the desired iterator + int fieldNumber = 0; + + for (String fieldName : fields) { + fieldInfoList[fieldNumber] = getFieldInfo(fieldName, fieldNumber); + fieldNumber++; + } + return fieldInfoList; + } + + /** + * Get new field info instance for a given field name and field number + * @param fieldName name of the field + * @param fieldNumber number of the field + * @return new field info instance + */ + public static FieldInfo getFieldInfo(String fieldName, int fieldNumber) { + return new FieldInfo( + fieldName, + fieldNumber, + false, + false, + true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, + DocValuesType.SORTED_NUMERIC, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + } + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java index 5cf737c61ab2d..42f0b921a7f07 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java @@ -11,6 +11,8 @@ import java.util.Map; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; + /** * /** * Represents a node in a tree data structure, specifically designed for a star-tree implementation. @@ -21,8 +23,6 @@ @ExperimentalApi public class TreeNode { - public static final int ALL = -1; - /** * The dimension id for the dimension (field) associated with this star-tree node. */ @@ -54,12 +54,20 @@ public class TreeNode { public long dimensionValue = ALL; /** - * A flag indicating whether this node is a star node (a node that represents an aggregation of all dimensions). + * A byte indicating whether the node is star node, null node or default node (with dimension value present). */ - public boolean isStarNode = false; + public byte nodeType = 0; /** * A map containing the child nodes of this star-tree node, keyed by their dimension id. */ public Map children; + + public long getDimensionValue() { + return dimensionValue; + } + + public byte getNodeType() { + return nodeType; + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/package-info.java index 59f18efec26b1..9a88f88d9850a 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/package-info.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/package-info.java @@ -8,6 +8,7 @@ /** * Core classes for handling composite indices. - * @opensearch.experimental + * + * @opensearch.experimental */ package org.opensearch.index.compositeindex; diff --git a/server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java b/server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java index 049d91bc42d9c..e7ec9f7fd107d 100644 --- a/server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java +++ b/server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java @@ -23,6 +23,7 @@ import org.opensearch.cluster.ClusterModule; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.CheckedConsumer; +import org.opensearch.common.Rounding; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; @@ -30,16 +31,27 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.MapperTestUtils; import org.opensearch.index.codec.composite.Composite99Codec; +import org.opensearch.index.compositeindex.datacube.DateDimension; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.NumericDimension; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.StarTreeMapper; import org.opensearch.indices.IndicesModule; import org.junit.After; import org.junit.AfterClass; import org.junit.BeforeClass; import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; +import java.util.List; import static org.opensearch.common.util.FeatureFlags.STAR_TREE_INDEX; +import static org.opensearch.test.OpenSearchTestCase.randomFrom; /** * Star tree doc values Lucene tests @@ -76,6 +88,32 @@ protected Codec getCodec() { return codec; } + private StarTreeMapper.StarTreeFieldType getStarTreeFieldType() { + List m1 = new ArrayList<>(); + m1.add(MetricStat.MAX); + Metric metric = new Metric("sndv", m1); + List d1CalendarIntervals = new ArrayList<>(); + d1CalendarIntervals.add(Rounding.DateTimeUnit.HOUR_OF_DAY); + StarTreeField starTreeField = getStarTreeField(d1CalendarIntervals, metric); + + return new StarTreeMapper.StarTreeFieldType("star_tree", starTreeField); + } + + private static StarTreeField getStarTreeField(List d1CalendarIntervals, Metric metric1) { + DateDimension d1 = new DateDimension("field", d1CalendarIntervals); + NumericDimension d2 = new NumericDimension("dv"); + + List metrics = List.of(metric1); + List dims = List.of(d1, d2); + StarTreeFieldConfiguration config = new StarTreeFieldConfiguration( + 100, + Collections.emptySet(), + randomFrom(StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP) // TODO : change it + ); + + return new StarTreeField("starTree", dims, metrics, config); + } + public void testStarTreeDocValues() throws IOException { Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(null); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilderTests.java index 51ebc02ea8243..42f195f41b1af 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilderTests.java @@ -8,20 +8,24 @@ package org.opensearch.index.compositeindex.datacube.startree.builder; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.lucene99.Lucene99Codec; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.Version; import org.opensearch.common.settings.Settings; +import org.opensearch.index.codec.composite.Composite99DocValuesFormat; import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; import org.opensearch.index.compositeindex.datacube.Dimension; import org.opensearch.index.compositeindex.datacube.Metric; @@ -51,6 +55,7 @@ import java.util.Map; import java.util.Set; import java.util.UUID; +import java.util.concurrent.atomic.AtomicInteger; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -75,9 +80,12 @@ public class BaseStarTreeBuilderTests extends OpenSearchTestCase { private static List metrics; private static Directory directory; private static FieldInfo[] fieldsInfo; - private static SegmentWriteState state; + private static SegmentWriteState writeState; private static StarTreeField starTreeField; + private static IndexOutput dataOut; + private static IndexOutput metaOut; + @BeforeClass public static void setup() throws IOException { @@ -138,7 +146,21 @@ public static void setup() throws IOException { fieldProducerMap.put(fields.get(i), docValuesProducer); } FieldInfos fieldInfos = new FieldInfos(fieldsInfo); - state = new SegmentWriteState(InfoStream.getDefault(), segmentInfo.dir, segmentInfo, fieldInfos, null, newIOContext(random())); + writeState = new SegmentWriteState(InfoStream.getDefault(), segmentInfo.dir, segmentInfo, fieldInfos, null, newIOContext(random())); + + String dataFileName = IndexFileNames.segmentFileName( + writeState.segmentInfo.name, + writeState.segmentSuffix, + Composite99DocValuesFormat.DATA_EXTENSION + ); + dataOut = writeState.directory.createOutput(dataFileName, writeState.context); + + String metaFileName = IndexFileNames.segmentFileName( + writeState.segmentInfo.name, + writeState.segmentSuffix, + Composite99DocValuesFormat.META_EXTENSION + ); + metaOut = writeState.directory.createOutput(metaFileName, writeState.context); mapperService = mock(MapperService.class); DocumentMapper documentMapper = mock(DocumentMapper.class); @@ -157,9 +179,13 @@ public static void setup() throws IOException { ); when(documentMapper.mappers()).thenReturn(fieldMappers); - builder = new BaseStarTreeBuilder(starTreeField, state, mapperService) { + builder = new BaseStarTreeBuilder(metaOut, dataOut, starTreeField, writeState, mapperService) { @Override - public void build(List starTreeValuesSubs) throws IOException {} + public void build( + List starTreeValuesSubs, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException {} @Override public void appendStarTreeDocument(StarTreeDocument starTreeDocument) throws IOException {} @@ -169,6 +195,11 @@ public StarTreeDocument getStarTreeDocument(int docId) throws IOException { return null; } + @Override + public StarTreeDocument getStarTreeDocumentForCreatingDocValues(int docId) throws IOException { + return null; + } + @Override public List getStarTreeDocuments() { return List.of(); @@ -224,6 +255,8 @@ public void test_reduceStarTreeDocuments() { @Override public void tearDown() throws Exception { super.tearDown(); + dataOut.close(); + metaOut.close(); directory.close(); } } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java index aed08b7727be7..5667ca428f648 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java @@ -8,17 +8,1080 @@ package org.opensearch.index.compositeindex.datacube.startree.builder; +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.sandbox.document.HalfFloatPoint; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util.Version; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.codec.composite.Composite99DocValuesFormat; +import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.NumericDimension; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; +import org.opensearch.index.mapper.ContentPath; +import org.opensearch.index.mapper.DocumentMapper; +import org.opensearch.index.mapper.Mapper; import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.MappingLookup; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; -public class OnHeapStarTreeBuilderTests extends AbstractStarTreeBuilderTests { - @Override - public BaseStarTreeBuilder getStarTreeBuilder( - StarTreeField starTreeField, - SegmentWriteState segmentWriteState, - MapperService mapperService +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class OnHeapStarTreeBuilderTests extends OpenSearchTestCase { + + private OnHeapStarTreeBuilder builder; + private MapperService mapperService; + private List dimensionsOrder; + private List fields = List.of(); + private List metrics; + private Directory directory; + private FieldInfo[] fieldsInfo; + private StarTreeField compositeField; + private Map fieldProducerMap; + private SegmentWriteState writeState; + private IndexOutput dataOut; + private IndexOutput metaOut; + private DocValuesConsumer docValuesConsumer; + + @Before + public void setup() throws IOException { + fields = List.of("field1", "field2", "field3", "field4", "field5", "field6", "field7", "field8", "field9", "field10"); + + dimensionsOrder = List.of( + new NumericDimension("field1"), + new NumericDimension("field3"), + new NumericDimension("field5"), + new NumericDimension("field8") + ); + metrics = List.of( + new Metric("field2", List.of(MetricStat.SUM)), + new Metric("field4", List.of(MetricStat.SUM)), + new Metric("field6", List.of(MetricStat.COUNT)), + new Metric("field9", List.of(MetricStat.MIN)), + new Metric("field10", List.of(MetricStat.MAX)) + ); + + DocValuesProducer docValuesProducer = mock(DocValuesProducer.class); + docValuesConsumer = mock(DocValuesConsumer.class); + + compositeField = new StarTreeField( + "test", + dimensionsOrder, + metrics, + new StarTreeFieldConfiguration(1, Set.of("field8"), StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP) + ); + directory = newFSDirectory(createTempDir()); + SegmentInfo segmentInfo = new SegmentInfo( + directory, + Version.LATEST, + Version.LUCENE_9_11_0, + "test_segment", + 6, + false, + false, + new Lucene99Codec(), + new HashMap<>(), + UUID.randomUUID().toString().substring(0, 16).getBytes(StandardCharsets.UTF_8), + new HashMap<>(), + null + ); + + fieldsInfo = new FieldInfo[fields.size()]; + fieldProducerMap = new HashMap<>(); + for (int i = 0; i < fieldsInfo.length; i++) { + fieldsInfo[i] = new FieldInfo( + fields.get(i), + i, + false, + false, + true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, + DocValuesType.SORTED_NUMERIC, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + fieldProducerMap.put(fields.get(i), docValuesProducer); + } + FieldInfos fieldInfos = new FieldInfos(fieldsInfo); + writeState = new SegmentWriteState(InfoStream.getDefault(), segmentInfo.dir, segmentInfo, fieldInfos, null, newIOContext(random())); + + String dataFileName = IndexFileNames.segmentFileName( + writeState.segmentInfo.name, + writeState.segmentSuffix, + Composite99DocValuesFormat.DATA_EXTENSION + ); + dataOut = writeState.directory.createOutput(dataFileName, writeState.context); + + String metaFileName = IndexFileNames.segmentFileName( + writeState.segmentInfo.name, + writeState.segmentSuffix, + Composite99DocValuesFormat.META_EXTENSION + ); + metaOut = writeState.directory.createOutput(metaFileName, writeState.context); + + mapperService = mock(MapperService.class); + DocumentMapper documentMapper = mock(DocumentMapper.class); + when(mapperService.documentMapper()).thenReturn(documentMapper); + Settings settings = Settings.builder().put(settings(org.opensearch.Version.CURRENT).build()).build(); + NumberFieldMapper numberFieldMapper1 = new NumberFieldMapper.Builder("field2", NumberFieldMapper.NumberType.DOUBLE, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + NumberFieldMapper numberFieldMapper2 = new NumberFieldMapper.Builder("field4", NumberFieldMapper.NumberType.DOUBLE, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + NumberFieldMapper numberFieldMapper3 = new NumberFieldMapper.Builder("field6", NumberFieldMapper.NumberType.DOUBLE, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + NumberFieldMapper numberFieldMapper4 = new NumberFieldMapper.Builder("field9", NumberFieldMapper.NumberType.DOUBLE, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + NumberFieldMapper numberFieldMapper5 = new NumberFieldMapper.Builder("field10", NumberFieldMapper.NumberType.DOUBLE, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + MappingLookup fieldMappers = new MappingLookup( + Set.of(numberFieldMapper1, numberFieldMapper2, numberFieldMapper3, numberFieldMapper4, numberFieldMapper5), + Collections.emptyList(), + Collections.emptyList(), + 0, + null + ); + when(documentMapper.mappers()).thenReturn(fieldMappers); + builder = new OnHeapStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); + } + + public void test_sortAndAggregateStarTreeDocuments() throws IOException { + + int noOfStarTreeDocuments = 5; + StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + + starTreeDocuments[0] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Double[] { 12.0, 10.0, randomDouble(), 8.0, 20.0 }); + starTreeDocuments[1] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 10.0, 6.0, randomDouble(), 12.0, 10.0 }); + starTreeDocuments[2] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 14.0, 12.0, randomDouble(), 6.0, 24.0 }); + starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Double[] { 9.0, 4.0, randomDouble(), 9.0, 12.0 }); + starTreeDocuments[4] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 11.0, 16.0, randomDouble(), 8.0, 13.0 }); + + List inorderStarTreeDocuments = List.of( + new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Object[] { 21.0, 14.0, 2L, 8.0, 20.0 }), + new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Object[] { 35.0, 34.0, 3L, 6.0, 24.0 }) + ); + Iterator expectedStarTreeDocumentIterator = inorderStarTreeDocuments.iterator(); + + StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + for (int i = 0; i < noOfStarTreeDocuments; i++) { + Long[] metrics = new Long[starTreeDocuments[0].metrics.length]; + for (int j = 0; j < metrics.length; j++) { + metrics[j] = NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[j]); + } + segmentStarTreeDocuments[i] = new StarTreeDocument(starTreeDocuments[i].dimensions, metrics); + } + + Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateStarTreeDocuments( + segmentStarTreeDocuments, + false + ); + int numOfAggregatedDocuments = 0; + while (segmentStarTreeDocumentIterator.hasNext() && expectedStarTreeDocumentIterator.hasNext()) { + StarTreeDocument resultStarTreeDocument = segmentStarTreeDocumentIterator.next(); + StarTreeDocument expectedStarTreeDocument = expectedStarTreeDocumentIterator.next(); + for (int dim = 0; dim < 4; dim++) { + assertEquals(expectedStarTreeDocument.dimensions[dim], resultStarTreeDocument.dimensions[dim]); + } + + for (int met = 0; met < 5; met++) { + assertEquals(expectedStarTreeDocument.metrics[met], resultStarTreeDocument.metrics[met]); + } + numOfAggregatedDocuments++; + } + assertEquals(inorderStarTreeDocuments.size(), numOfAggregatedDocuments); + + } + + public void test_sortAndAggregateStarTreeDocuments_idempotentMetric() throws IOException { + + int noOfStarTreeDocuments = 5; + StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + + starTreeDocuments[0] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Double[] { 12.0, 10.0, randomDouble(), 8.0, 20.0 }); + starTreeDocuments[1] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 10.0, 6.0, randomDouble(), 8.0, 24.0 }); + starTreeDocuments[2] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 14.0, 12.0, randomDouble(), 8.0, 20.0 }); + starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Double[] { 9.0, 4.0, randomDouble(), 9.0, 12.0 }); + starTreeDocuments[4] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 11.0, null, randomDouble(), 8.0, 20.0 }); + + StarTreeDocument expectedStarTreeDocument = new StarTreeDocument( + new Long[] { 2L, 4L, 3L, 4L }, + new Object[] { 21.0, 14.0, 2L, 8.0, 20.0 } + ); + StarTreeDocument expectedStarTreeDocumentWithIdempotentMetric = new StarTreeDocument( + new Long[] { 3L, 4L, 2L, 1L }, + new Object[] { 35.0, 18.0, 3L, 8.0, 24.0 } + ); + + StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + for (int i = 0; i < noOfStarTreeDocuments; i++) { + Long[] metrics = new Long[starTreeDocuments[0].metrics.length]; + for (int j = 0; j < metrics.length; j++) { + metrics[j] = starTreeDocuments[i].metrics[j] != null + ? NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[j]) + : null; + } + segmentStarTreeDocuments[i] = new StarTreeDocument(starTreeDocuments[i].dimensions, metrics); + } + + Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateStarTreeDocuments( + segmentStarTreeDocuments, + false + ); + + StarTreeDocument resultStarTreeDocument = segmentStarTreeDocumentIterator.next(); + for (int dim = 0; dim < 4; dim++) { + assertEquals(expectedStarTreeDocument.dimensions[dim], resultStarTreeDocument.dimensions[dim]); + } + + for (int met = 0; met < 5; met++) { + assertEquals(expectedStarTreeDocument.metrics[met], resultStarTreeDocument.metrics[met]); + } + + resultStarTreeDocument = segmentStarTreeDocumentIterator.next(); + for (int dim = 0; dim < 4; dim++) { + assertEquals(expectedStarTreeDocumentWithIdempotentMetric.dimensions[dim], resultStarTreeDocument.dimensions[dim]); + } + + for (int met = 0; met < 5; met++) { + assertEquals(expectedStarTreeDocumentWithIdempotentMetric.metrics[met], resultStarTreeDocument.metrics[met]); + } + + } + + public void test_sortAndAggregateStarTreeDocument_longMaxAndLongMinDimensions() throws IOException { + + int noOfStarTreeDocuments = 5; + StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + + starTreeDocuments[0] = new StarTreeDocument( + new Long[] { Long.MIN_VALUE, 4L, 3L, 4L }, + new Double[] { 12.0, 10.0, randomDouble(), 8.0, 20.0 } + ); + starTreeDocuments[1] = new StarTreeDocument( + new Long[] { 3L, 4L, 2L, Long.MAX_VALUE }, + new Double[] { 10.0, 6.0, randomDouble(), 12.0, 10.0 } + ); + starTreeDocuments[2] = new StarTreeDocument( + new Long[] { 3L, 4L, 2L, Long.MAX_VALUE }, + new Double[] { 14.0, 12.0, randomDouble(), 6.0, 24.0 } + ); + starTreeDocuments[3] = new StarTreeDocument( + new Long[] { Long.MIN_VALUE, 4L, 3L, 4L }, + new Double[] { 9.0, 4.0, randomDouble(), 9.0, 12.0 } + ); + starTreeDocuments[4] = new StarTreeDocument( + new Long[] { 3L, 4L, 2L, Long.MAX_VALUE }, + new Double[] { 11.0, 16.0, randomDouble(), 8.0, 13.0 } + ); + + List inorderStarTreeDocuments = List.of( + new StarTreeDocument(new Long[] { Long.MIN_VALUE, 4L, 3L, 4L }, new Object[] { 21.0, 14.0, 2L, 8.0, 20.0 }), + new StarTreeDocument(new Long[] { 3L, 4L, 2L, Long.MAX_VALUE }, new Object[] { 35.0, 34.0, 3L, 6.0, 24.0 }) + ); + Iterator expectedStarTreeDocumentIterator = inorderStarTreeDocuments.iterator(); + + StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + for (int i = 0; i < noOfStarTreeDocuments; i++) { + Long[] metrics = new Long[starTreeDocuments[0].metrics.length]; + for (int j = 0; j < metrics.length; j++) { + metrics[j] = NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[j]); + } + segmentStarTreeDocuments[i] = new StarTreeDocument(starTreeDocuments[i].dimensions, metrics); + } + + Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateStarTreeDocuments( + segmentStarTreeDocuments, + false + ); + int numOfAggregatedDocuments = 0; + while (segmentStarTreeDocumentIterator.hasNext() && expectedStarTreeDocumentIterator.hasNext()) { + StarTreeDocument resultStarTreeDocument = segmentStarTreeDocumentIterator.next(); + StarTreeDocument expectedStarTreeDocument = expectedStarTreeDocumentIterator.next(); + + for (int dim = 0; dim < 4; dim++) { + assertEquals(expectedStarTreeDocument.dimensions[dim], resultStarTreeDocument.dimensions[dim]); + } + + for (int met = 0; met < 5; met++) { + assertEquals(expectedStarTreeDocument.metrics[met], resultStarTreeDocument.metrics[met]); + } + + numOfAggregatedDocuments++; + } + + assertEquals(inorderStarTreeDocuments.size(), numOfAggregatedDocuments); + + } + + public void test_sortAndAggregateStarTreeDocument_DoubleMaxAndDoubleMinMetrics() throws IOException { + + int noOfStarTreeDocuments = 5; + StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + + starTreeDocuments[0] = new StarTreeDocument( + new Long[] { 2L, 4L, 3L, 4L }, + new Double[] { Double.MAX_VALUE, 10.0, randomDouble(), 8.0, 20.0 } + ); + starTreeDocuments[1] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 10.0, 6.0, randomDouble(), 12.0, 10.0 }); + starTreeDocuments[2] = new StarTreeDocument( + new Long[] { 3L, 4L, 2L, 1L }, + new Double[] { 14.0, Double.MIN_VALUE, randomDouble(), 6.0, 24.0 } + ); + starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Double[] { 9.0, 4.0, randomDouble(), 9.0, 12.0 }); + starTreeDocuments[4] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 11.0, 16.0, randomDouble(), 8.0, 13.0 }); + + List inorderStarTreeDocuments = List.of( + new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Object[] { Double.MAX_VALUE + 9, 14.0, 2L, 8.0, 20.0 }), + new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Object[] { 35.0, Double.MIN_VALUE + 22, 3L, 6.0, 24.0 }) + ); + Iterator expectedStarTreeDocumentIterator = inorderStarTreeDocuments.iterator(); + + StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + for (int i = 0; i < noOfStarTreeDocuments; i++) { + Long[] metrics = new Long[starTreeDocuments[0].metrics.length]; + for (int j = 0; j < metrics.length; j++) { + metrics[j] = NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[j]); + } + segmentStarTreeDocuments[i] = new StarTreeDocument(starTreeDocuments[i].dimensions, metrics); + } + + Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateStarTreeDocuments( + segmentStarTreeDocuments, + false + ); + int numOfAggregatedDocuments = 0; + while (segmentStarTreeDocumentIterator.hasNext() && expectedStarTreeDocumentIterator.hasNext()) { + StarTreeDocument resultStarTreeDocument = segmentStarTreeDocumentIterator.next(); + StarTreeDocument expectedStarTreeDocument = expectedStarTreeDocumentIterator.next(); + + for (int dim = 0; dim < 4; dim++) { + assertEquals(expectedStarTreeDocument.dimensions[dim], resultStarTreeDocument.dimensions[dim]); + } + + for (int met = 0; met < 5; met++) { + assertEquals(expectedStarTreeDocument.metrics[met], resultStarTreeDocument.metrics[met]); + } + + numOfAggregatedDocuments++; + } + + assertEquals(inorderStarTreeDocuments.size(), numOfAggregatedDocuments); + + } + + public void test_build_halfFloatMetrics() throws IOException { + + mapperService = mock(MapperService.class); + DocumentMapper documentMapper = mock(DocumentMapper.class); + when(mapperService.documentMapper()).thenReturn(documentMapper); + Settings settings = Settings.builder().put(settings(org.opensearch.Version.CURRENT).build()).build(); + NumberFieldMapper numberFieldMapper1 = new NumberFieldMapper.Builder("field2", NumberFieldMapper.NumberType.HALF_FLOAT, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + NumberFieldMapper numberFieldMapper2 = new NumberFieldMapper.Builder("field4", NumberFieldMapper.NumberType.HALF_FLOAT, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + NumberFieldMapper numberFieldMapper3 = new NumberFieldMapper.Builder("field6", NumberFieldMapper.NumberType.HALF_FLOAT, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + NumberFieldMapper numberFieldMapper4 = new NumberFieldMapper.Builder("field9", NumberFieldMapper.NumberType.HALF_FLOAT, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + NumberFieldMapper numberFieldMapper5 = new NumberFieldMapper.Builder( + "field10", + NumberFieldMapper.NumberType.HALF_FLOAT, + false, + true + ).build(new Mapper.BuilderContext(settings, new ContentPath())); + MappingLookup fieldMappers = new MappingLookup( + Set.of(numberFieldMapper1, numberFieldMapper2, numberFieldMapper3, numberFieldMapper4, numberFieldMapper5), + Collections.emptyList(), + Collections.emptyList(), + 0, + null + ); + when(documentMapper.mappers()).thenReturn(fieldMappers); + builder = new OnHeapStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); + + int noOfStarTreeDocuments = 5; + StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + + starTreeDocuments[0] = new StarTreeDocument( + new Long[] { 2L, 4L, 3L, 4L }, + new HalfFloatPoint[] { + new HalfFloatPoint("hf1", 12), + new HalfFloatPoint("hf6", 10), + new HalfFloatPoint("field6", 10), + new HalfFloatPoint("field9", 8), + new HalfFloatPoint("field10", 20) } + ); + starTreeDocuments[1] = new StarTreeDocument( + new Long[] { 3L, 4L, 2L, 1L }, + new HalfFloatPoint[] { + new HalfFloatPoint("hf2", 10), + new HalfFloatPoint("hf7", 6), + new HalfFloatPoint("field6", 10), + new HalfFloatPoint("field9", 12), + new HalfFloatPoint("field10", 10) } + ); + starTreeDocuments[2] = new StarTreeDocument( + new Long[] { 3L, 4L, 2L, 1L }, + new HalfFloatPoint[] { + new HalfFloatPoint("hf3", 14), + new HalfFloatPoint("hf8", 12), + new HalfFloatPoint("field6", 10), + new HalfFloatPoint("field9", 6), + new HalfFloatPoint("field10", 24) } + ); + starTreeDocuments[3] = new StarTreeDocument( + new Long[] { 2L, 4L, 3L, 4L }, + new HalfFloatPoint[] { + new HalfFloatPoint("hf4", 9), + new HalfFloatPoint("hf9", 4), + new HalfFloatPoint("field6", 10), + new HalfFloatPoint("field9", 9), + new HalfFloatPoint("field10", 12) } + ); + starTreeDocuments[4] = new StarTreeDocument( + new Long[] { 3L, 4L, 2L, 1L }, + new HalfFloatPoint[] { + new HalfFloatPoint("hf5", 11), + new HalfFloatPoint("hf10", 16), + new HalfFloatPoint("field6", 10), + new HalfFloatPoint("field9", 8), + new HalfFloatPoint("field10", 13) } + ); + + StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + for (int i = 0; i < noOfStarTreeDocuments; i++) { + Long[] metrics = new Long[starTreeDocuments[0].metrics.length]; + for (int j = 0; j < metrics.length; j++) { + metrics[j] = (long) HalfFloatPoint.halfFloatToSortableShort( + ((HalfFloatPoint) starTreeDocuments[i].metrics[j]).numericValue().floatValue() + ); + } + segmentStarTreeDocuments[i] = new StarTreeDocument(starTreeDocuments[i].dimensions, metrics); + } + + Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateStarTreeDocuments( + segmentStarTreeDocuments, + false + ); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); + List resultStarTreeDocuments = builder.getStarTreeDocuments(); + assertEquals(7, resultStarTreeDocuments.size()); + + Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator(); + assertStarTreeDocuments(resultStarTreeDocuments, expectedStarTreeDocumentIterator); + } + + public void test_build_floatMetrics() throws IOException { + + mapperService = mock(MapperService.class); + DocumentMapper documentMapper = mock(DocumentMapper.class); + when(mapperService.documentMapper()).thenReturn(documentMapper); + Settings settings = Settings.builder().put(settings(org.opensearch.Version.CURRENT).build()).build(); + NumberFieldMapper numberFieldMapper1 = new NumberFieldMapper.Builder("field2", NumberFieldMapper.NumberType.FLOAT, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + NumberFieldMapper numberFieldMapper2 = new NumberFieldMapper.Builder("field4", NumberFieldMapper.NumberType.FLOAT, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + NumberFieldMapper numberFieldMapper3 = new NumberFieldMapper.Builder("field6", NumberFieldMapper.NumberType.FLOAT, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + NumberFieldMapper numberFieldMapper4 = new NumberFieldMapper.Builder("field9", NumberFieldMapper.NumberType.FLOAT, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + NumberFieldMapper numberFieldMapper5 = new NumberFieldMapper.Builder("field10", NumberFieldMapper.NumberType.FLOAT, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + MappingLookup fieldMappers = new MappingLookup( + Set.of(numberFieldMapper1, numberFieldMapper2, numberFieldMapper3, numberFieldMapper4, numberFieldMapper5), + Collections.emptyList(), + Collections.emptyList(), + 0, + null + ); + when(documentMapper.mappers()).thenReturn(fieldMappers); + builder = new OnHeapStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); + + int noOfStarTreeDocuments = 5; + StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + + starTreeDocuments[0] = new StarTreeDocument( + new Long[] { 2L, 4L, 3L, 4L }, + new Float[] { 12.0F, 10.0F, randomFloat(), 8.0F, 20.0F } + ); + starTreeDocuments[1] = new StarTreeDocument( + new Long[] { 3L, 4L, 2L, 1L }, + new Float[] { 10.0F, 6.0F, randomFloat(), 12.0F, 10.0F } + ); + starTreeDocuments[2] = new StarTreeDocument( + new Long[] { 3L, 4L, 2L, 1L }, + new Float[] { 14.0F, 12.0F, randomFloat(), 6.0F, 24.0F } + ); + starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Float[] { 9.0F, 4.0F, randomFloat(), 9.0F, 12.0F }); + starTreeDocuments[4] = new StarTreeDocument( + new Long[] { 3L, 4L, 2L, 1L }, + new Float[] { 11.0F, 16.0F, randomFloat(), 8.0F, 13.0F } + ); + + StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + for (int i = 0; i < noOfStarTreeDocuments; i++) { + Long[] metrics = new Long[starTreeDocuments[0].metrics.length]; + for (int j = 0; j < metrics.length; j++) { + metrics[j] = (long) NumericUtils.floatToSortableInt((Float) starTreeDocuments[i].metrics[j]); + } + segmentStarTreeDocuments[i] = new StarTreeDocument(starTreeDocuments[i].dimensions, metrics); + } + + Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateStarTreeDocuments( + segmentStarTreeDocuments, + false + ); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); + + List resultStarTreeDocuments = builder.getStarTreeDocuments(); + assertEquals(7, resultStarTreeDocuments.size()); + + Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator(); + assertStarTreeDocuments(resultStarTreeDocuments, expectedStarTreeDocumentIterator); + } + + public void test_build_longMetrics() throws IOException { + + mapperService = mock(MapperService.class); + DocumentMapper documentMapper = mock(DocumentMapper.class); + when(mapperService.documentMapper()).thenReturn(documentMapper); + Settings settings = Settings.builder().put(settings(org.opensearch.Version.CURRENT).build()).build(); + NumberFieldMapper numberFieldMapper1 = new NumberFieldMapper.Builder("field2", NumberFieldMapper.NumberType.LONG, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + NumberFieldMapper numberFieldMapper2 = new NumberFieldMapper.Builder("field4", NumberFieldMapper.NumberType.LONG, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + NumberFieldMapper numberFieldMapper3 = new NumberFieldMapper.Builder("field6", NumberFieldMapper.NumberType.LONG, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + NumberFieldMapper numberFieldMapper4 = new NumberFieldMapper.Builder("field9", NumberFieldMapper.NumberType.LONG, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + NumberFieldMapper numberFieldMapper5 = new NumberFieldMapper.Builder("field10", NumberFieldMapper.NumberType.LONG, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + MappingLookup fieldMappers = new MappingLookup( + Set.of(numberFieldMapper1, numberFieldMapper2, numberFieldMapper3, numberFieldMapper4, numberFieldMapper5), + Collections.emptyList(), + Collections.emptyList(), + 0, + null + ); + when(documentMapper.mappers()).thenReturn(fieldMappers); + builder = new OnHeapStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); + + int noOfStarTreeDocuments = 5; + StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + + starTreeDocuments[0] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Long[] { 12L, 10L, randomLong(), 8L, 20L }); + starTreeDocuments[1] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Long[] { 10L, 6L, randomLong(), 12L, 10L }); + starTreeDocuments[2] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Long[] { 14L, 12L, randomLong(), 6L, 24L }); + starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Long[] { 9L, 4L, randomLong(), 9L, 12L }); + starTreeDocuments[4] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Long[] { 11L, 16L, randomLong(), 8L, 13L }); + + StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + for (int i = 0; i < noOfStarTreeDocuments; i++) { + Long[] metrics = new Long[starTreeDocuments[0].metrics.length]; + for (int j = 0; j < metrics.length; j++) { + metrics[j] = (Long) starTreeDocuments[i].metrics[j]; + } + segmentStarTreeDocuments[i] = new StarTreeDocument(starTreeDocuments[i].dimensions, metrics); + } + + Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateStarTreeDocuments( + segmentStarTreeDocuments, + false + ); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); + + List resultStarTreeDocuments = builder.getStarTreeDocuments(); + assertEquals(7, resultStarTreeDocuments.size()); + + Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator(); + assertStarTreeDocuments(resultStarTreeDocuments, expectedStarTreeDocumentIterator); + } + + private static Iterator getExpectedStarTreeDocumentIterator() { + List expectedStarTreeDocuments = List.of( + new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Object[] { 21.0, 14.0, 2L, 8.0, 20.0 }), + new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Object[] { 35.0, 34.0, 3L, 6.0, 24.0 }), + new StarTreeDocument(new Long[] { null, 4L, 2L, 1L }, new Object[] { 35.0, 34.0, 3L, 6.0, 24.0 }), + new StarTreeDocument(new Long[] { null, 4L, 3L, 4L }, new Object[] { 21.0, 14.0, 2L, 8.0, 20.0 }), + new StarTreeDocument(new Long[] { null, 4L, null, 1L }, new Object[] { 35.0, 34.0, 3L, 6.0, 24.0 }), + new StarTreeDocument(new Long[] { null, 4L, null, 4L }, new Object[] { 21.0, 14.0, 2L, 8.0, 20.0 }), + new StarTreeDocument(new Long[] { null, 4L, null, null }, new Object[] { 56.0, 48.0, 5L, 6.0, 24.0 }) + ); + return expectedStarTreeDocuments.iterator(); + } + + public void test_build() throws IOException { + + int noOfStarTreeDocuments = 5; + StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + + starTreeDocuments[0] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Double[] { 12.0, 10.0, randomDouble(), 8.0, 20.0 }); + starTreeDocuments[1] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 10.0, 6.0, randomDouble(), 12.0, 10.0 }); + starTreeDocuments[2] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 14.0, 12.0, randomDouble(), 6.0, 24.0 }); + starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Double[] { 9.0, 4.0, randomDouble(), 9.0, 12.0 }); + starTreeDocuments[4] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 11.0, 16.0, randomDouble(), 8.0, 13.0 }); + + StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + for (int i = 0; i < noOfStarTreeDocuments; i++) { + Long[] metrics = new Long[starTreeDocuments[0].metrics.length]; + for (int j = 0; j < metrics.length; j++) { + metrics[j] = NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[j]); + } + segmentStarTreeDocuments[i] = new StarTreeDocument(starTreeDocuments[i].dimensions, metrics); + } + + Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateStarTreeDocuments( + segmentStarTreeDocuments, + false + ); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); + + List resultStarTreeDocuments = builder.getStarTreeDocuments(); + assertEquals(7, resultStarTreeDocuments.size()); + + Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator(); + assertStarTreeDocuments(resultStarTreeDocuments, expectedStarTreeDocumentIterator); + } + + private void assertStarTreeDocuments( + List resultStarTreeDocuments, + Iterator expectedStarTreeDocumentIterator ) { - return new OnHeapStarTreeBuilder(starTreeField, segmentWriteState, mapperService); + Iterator resultStarTreeDocumentIterator = resultStarTreeDocuments.iterator(); + while (resultStarTreeDocumentIterator.hasNext() && expectedStarTreeDocumentIterator.hasNext()) { + StarTreeDocument resultStarTreeDocument = resultStarTreeDocumentIterator.next(); + StarTreeDocument expectedStarTreeDocument = expectedStarTreeDocumentIterator.next(); + + assertEquals(expectedStarTreeDocument.dimensions[0], resultStarTreeDocument.dimensions[0]); + assertEquals(expectedStarTreeDocument.dimensions[1], resultStarTreeDocument.dimensions[1]); + assertEquals(expectedStarTreeDocument.dimensions[2], resultStarTreeDocument.dimensions[2]); + assertEquals(expectedStarTreeDocument.dimensions[3], resultStarTreeDocument.dimensions[3]); + assertEquals(expectedStarTreeDocument.metrics[0], resultStarTreeDocument.metrics[0]); + assertEquals(expectedStarTreeDocument.metrics[1], resultStarTreeDocument.metrics[1]); + assertEquals(expectedStarTreeDocument.metrics[2], resultStarTreeDocument.metrics[2]); + assertEquals(expectedStarTreeDocument.metrics[3], resultStarTreeDocument.metrics[3]); + assertEquals(expectedStarTreeDocument.metrics[4], resultStarTreeDocument.metrics[4]); + } + } + + public void test_build_starTreeDataset() throws IOException { + + fields = List.of("fieldC", "fieldB", "fieldL", "fieldI"); + + dimensionsOrder = List.of(new NumericDimension("fieldC"), new NumericDimension("fieldB"), new NumericDimension("fieldL")); + metrics = List.of(new Metric("fieldI", List.of(MetricStat.SUM))); + + DocValuesProducer docValuesProducer = mock(DocValuesProducer.class); + + compositeField = new StarTreeField( + "test", + dimensionsOrder, + metrics, + new StarTreeFieldConfiguration(1, Set.of(), StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP) + ); + SegmentInfo segmentInfo = new SegmentInfo( + directory, + Version.LATEST, + Version.LUCENE_9_11_0, + "test_segment", + 7, + false, + false, + new Lucene99Codec(), + new HashMap<>(), + UUID.randomUUID().toString().substring(0, 16).getBytes(StandardCharsets.UTF_8), + new HashMap<>(), + null + ); + + fieldsInfo = new FieldInfo[fields.size()]; + fieldProducerMap = new HashMap<>(); + for (int i = 0; i < fieldsInfo.length; i++) { + fieldsInfo[i] = new FieldInfo( + fields.get(i), + i, + false, + false, + true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, + DocValuesType.SORTED_NUMERIC, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + fieldProducerMap.put(fields.get(i), docValuesProducer); + } + FieldInfos fieldInfos = new FieldInfos(fieldsInfo); + writeState = new SegmentWriteState(InfoStream.getDefault(), segmentInfo.dir, segmentInfo, fieldInfos, null, newIOContext(random())); + + mapperService = mock(MapperService.class); + DocumentMapper documentMapper = mock(DocumentMapper.class); + when(mapperService.documentMapper()).thenReturn(documentMapper); + Settings settings = Settings.builder().put(settings(org.opensearch.Version.CURRENT).build()).build(); + NumberFieldMapper numberFieldMapper1 = new NumberFieldMapper.Builder("fieldI", NumberFieldMapper.NumberType.DOUBLE, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + MappingLookup fieldMappers = new MappingLookup( + Set.of(numberFieldMapper1), + Collections.emptyList(), + Collections.emptyList(), + 0, + null + ); + when(documentMapper.mappers()).thenReturn(fieldMappers); + builder = new OnHeapStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); + + int noOfStarTreeDocuments = 7; + StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + starTreeDocuments[0] = new StarTreeDocument(new Long[] { 1L, 11L, 21L }, new Double[] { 400.0 }); + starTreeDocuments[1] = new StarTreeDocument(new Long[] { 1L, 12L, 22L }, new Double[] { 200.0 }); + starTreeDocuments[2] = new StarTreeDocument(new Long[] { 2L, 13L, 23L }, new Double[] { 300.0 }); + starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, 13L, 21L }, new Double[] { 100.0 }); + starTreeDocuments[4] = new StarTreeDocument(new Long[] { 3L, 11L, 21L }, new Double[] { 600.0 }); + starTreeDocuments[5] = new StarTreeDocument(new Long[] { 3L, 12L, 23L }, new Double[] { 200.0 }); + starTreeDocuments[6] = new StarTreeDocument(new Long[] { 3L, 12L, 21L }, new Double[] { 400.0 }); + + StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + for (int i = 0; i < noOfStarTreeDocuments; i++) { + long metric1 = NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[0]); + segmentStarTreeDocuments[i] = new StarTreeDocument(starTreeDocuments[i].dimensions, new Long[] { metric1 }); + } + + Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateStarTreeDocuments( + segmentStarTreeDocuments, + false + ); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); + + List resultStarTreeDocuments = builder.getStarTreeDocuments(); + List expectedStarTreeDocuments = List.of( + new StarTreeDocument(new Long[] { 1L, 11L, 21L }, new Object[] { 400.0 }), + new StarTreeDocument(new Long[] { 1L, 12L, 22L }, new Object[] { 200.0 }), + new StarTreeDocument(new Long[] { 2L, 13L, 21L }, new Object[] { 100.0 }), + new StarTreeDocument(new Long[] { 2L, 13L, 23L }, new Object[] { 300.0 }), + new StarTreeDocument(new Long[] { 3L, 11L, 21L }, new Object[] { 600.0 }), + new StarTreeDocument(new Long[] { 3L, 12L, 21L }, new Object[] { 400.0 }), + new StarTreeDocument(new Long[] { 3L, 12L, 23L }, new Object[] { 200.0 }), + new StarTreeDocument(new Long[] { null, 11L, 21L }, new Object[] { 1000.0 }), + new StarTreeDocument(new Long[] { null, 12L, 21L }, new Object[] { 400.0 }), + new StarTreeDocument(new Long[] { null, 12L, 22L }, new Object[] { 200.0 }), + new StarTreeDocument(new Long[] { null, 12L, 23L }, new Object[] { 200.0 }), + new StarTreeDocument(new Long[] { null, 13L, 21L }, new Object[] { 100.0 }), + new StarTreeDocument(new Long[] { null, 13L, 23L }, new Object[] { 300.0 }), + new StarTreeDocument(new Long[] { null, null, 21L }, new Object[] { 1500.0 }), + new StarTreeDocument(new Long[] { null, null, 22L }, new Object[] { 200.0 }), + new StarTreeDocument(new Long[] { null, null, 23L }, new Object[] { 500.0 }), + new StarTreeDocument(new Long[] { null, null, null }, new Object[] { 2200.0 }), + new StarTreeDocument(new Long[] { null, 12L, null }, new Object[] { 800.0 }), + new StarTreeDocument(new Long[] { null, 13L, null }, new Object[] { 400.0 }), + new StarTreeDocument(new Long[] { 1L, null, 21L }, new Object[] { 400.0 }), + new StarTreeDocument(new Long[] { 1L, null, 22L }, new Object[] { 200.0 }), + new StarTreeDocument(new Long[] { 1L, null, null }, new Object[] { 600.0 }), + new StarTreeDocument(new Long[] { 2L, 13L, null }, new Object[] { 400.0 }), + new StarTreeDocument(new Long[] { 3L, null, 21L }, new Object[] { 1000.0 }), + new StarTreeDocument(new Long[] { 3L, null, 23L }, new Object[] { 200.0 }), + new StarTreeDocument(new Long[] { 3L, null, null }, new Object[] { 1200.0 }), + new StarTreeDocument(new Long[] { 3L, 12L, null }, new Object[] { 600.0 }) + ); + + Iterator expectedStarTreeDocumentIterator = expectedStarTreeDocuments.iterator(); + Iterator resultStarTreeDocumentIterator = resultStarTreeDocuments.iterator(); + while (resultStarTreeDocumentIterator.hasNext() && expectedStarTreeDocumentIterator.hasNext()) { + StarTreeDocument resultStarTreeDocument = resultStarTreeDocumentIterator.next(); + StarTreeDocument expectedStarTreeDocument = expectedStarTreeDocumentIterator.next(); + + assertEquals(expectedStarTreeDocument.dimensions[0], resultStarTreeDocument.dimensions[0]); + assertEquals(expectedStarTreeDocument.dimensions[1], resultStarTreeDocument.dimensions[1]); + assertEquals(expectedStarTreeDocument.dimensions[2], resultStarTreeDocument.dimensions[2]); + assertEquals(expectedStarTreeDocument.metrics[0], resultStarTreeDocument.metrics[0]); + } + + } + + public void testFlushFlow() throws IOException { + List dimList = List.of(0L, 1L, 3L, 4L, 5L); + List docsWithField = List.of(0, 1, 3, 4, 5); + List dimList2 = List.of(0L, 1L, 2L, 3L, 4L, 5L); + List docsWithField2 = List.of(0, 1, 2, 3, 4, 5); + + List metricsList = List.of( + getLongFromDouble(0.0), + getLongFromDouble(10.0), + getLongFromDouble(20.0), + getLongFromDouble(30.0), + getLongFromDouble(40.0), + getLongFromDouble(50.0) + ); + List metricsWithField = List.of(0, 1, 2, 3, 4, 5); + + Dimension d1 = new NumericDimension("field1"); + Dimension d2 = new NumericDimension("field3"); + Metric m1 = new Metric("field2", List.of(MetricStat.SUM)); + Metric m2 = new Metric("field2", List.of(MetricStat.COUNT)); + List dims = List.of(d1, d2); + List metrics = List.of(m1, m2); + StarTreeFieldConfiguration c = new StarTreeFieldConfiguration( + 1000, + new HashSet<>(), + StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP + ); + StarTreeField sf = new StarTreeField("sf", dims, metrics, c); + SortedNumericDocValues d1sndv = getSortedNumericMock(dimList, docsWithField); + SortedNumericDocValues d2sndv = getSortedNumericMock(dimList2, docsWithField2); + SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); + SortedNumericDocValues m2sndv = getSortedNumericMock(metricsList, metricsWithField); + + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, writeState, mapperService); + SequentialDocValuesIterator[] dimDvs = { new SequentialDocValuesIterator(d1sndv), new SequentialDocValuesIterator(d2sndv) }; + Iterator starTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( + dimDvs, + List.of(new SequentialDocValuesIterator(m1sndv), new SequentialDocValuesIterator(m2sndv)) + ); + /** + * Asserting following dim / metrics [ dim1, dim2 / Sum [metric], count [metric] ] + [0, 0] | [0.0, 1] + [1, 1] | [10.0, 1] + [3, 3] | [30.0, 1] + [4, 4] | [40.0, 1] + [5, 5] | [50.0, 1] + [null, 2] | [20.0, 1] + */ + int count = 0; + while (starTreeDocumentIterator.hasNext()) { + count++; + StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); + assertEquals( + starTreeDocument.dimensions[0] != null ? starTreeDocument.dimensions[0] * 1 * 10.0 : 20.0, + starTreeDocument.metrics[0] + ); + assertEquals(1L, starTreeDocument.metrics[1]); + } + assertEquals(6, count); + } + + public void testMergeFlow() throws IOException { + List dimList = List.of(0L, 1L, 3L, 4L, 5L); + List docsWithField = List.of(0, 1, 3, 4, 5); + List dimList2 = List.of(0L, 1L, 2L, 3L, 4L, 5L); + List docsWithField2 = List.of(0, 1, 2, 3, 4, 5); + + List metricsList = List.of( + getLongFromDouble(0.0), + getLongFromDouble(10.0), + getLongFromDouble(20.0), + getLongFromDouble(30.0), + getLongFromDouble(40.0), + getLongFromDouble(50.0) + ); + List metricsWithField = List.of(0, 1, 2, 3, 4, 5); + + Dimension d1 = new NumericDimension("field1"); + Dimension d2 = new NumericDimension("field3"); + Metric m1 = new Metric("field2", List.of(MetricStat.SUM)); + List dims = List.of(d1, d2); + List metrics = List.of(m1); + StarTreeFieldConfiguration c = new StarTreeFieldConfiguration( + 1000, + new HashSet<>(), + StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP + ); + StarTreeField sf = new StarTreeField("sf", dims, metrics, c); + SortedNumericDocValues d1sndv = getSortedNumericMock(dimList, docsWithField); + SortedNumericDocValues d2sndv = getSortedNumericMock(dimList2, docsWithField2); + SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); + Map dimDocIdSetIterators = Map.of("field1", d1sndv, "field3", d2sndv); + Map metricDocIdSetIterators = Map.of("field2", m1sndv); + StarTreeValues starTreeValues = new StarTreeValues(sf, null, dimDocIdSetIterators, metricDocIdSetIterators, new HashMap<>()); + + SortedNumericDocValues f2d1sndv = getSortedNumericMock(dimList, docsWithField); + SortedNumericDocValues f2d2sndv = getSortedNumericMock(dimList2, docsWithField2); + SortedNumericDocValues f2m1sndv = getSortedNumericMock(metricsList, metricsWithField); + Map f2dimDocIdSetIterators = Map.of("field1", f2d1sndv, "field3", f2d2sndv); + Map f2metricDocIdSetIterators = Map.of("field2", f2m1sndv); + StarTreeValues starTreeValues2 = new StarTreeValues(sf, null, f2dimDocIdSetIterators, f2metricDocIdSetIterators, new HashMap<>()); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, writeState, mapperService); + Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); + + /** + * Asserting following dim / metrics [ dim1, dim2 / Sum [ metric] ] + * [0, 0] | [0.0] + * [1, 1] | [20.0] + * [3, 3] | [60.0] + * [4, 4] | [80.0] + * [5, 5] | [100.0] + * [null, 2] | [40.0] + */ + int count = 0; + while (starTreeDocumentIterator.hasNext()) { + count++; + StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); + assertEquals( + starTreeDocument.dimensions[0] != null ? starTreeDocument.dimensions[0] * 2 * 10.0 : 40.0, + starTreeDocument.metrics[0] + ); + } + assertEquals(6, count); + } + + public void testMergeFlowWithCount() throws IOException { + List dimList = List.of(0L, 1L, 3L, 4L, 5L); + List docsWithField = List.of(0, 1, 3, 4, 5); + List dimList2 = List.of(0L, 1L, 2L, 3L, 4L, 5L); + List docsWithField2 = List.of(0, 1, 2, 3, 4, 5); + + List metricsList = List.of(0L, 1L, 2L, 3L, 4L, 5L); + List metricsWithField = List.of(0, 1, 2, 3, 4, 5); + + Dimension d1 = new NumericDimension("field1"); + Dimension d2 = new NumericDimension("field3"); + Metric m1 = new Metric("field2", List.of(MetricStat.COUNT)); + List dims = List.of(d1, d2); + List metrics = List.of(m1); + StarTreeFieldConfiguration c = new StarTreeFieldConfiguration( + 1000, + new HashSet<>(), + StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP + ); + StarTreeField sf = new StarTreeField("sf", dims, metrics, c); + SortedNumericDocValues d1sndv = getSortedNumericMock(dimList, docsWithField); + SortedNumericDocValues d2sndv = getSortedNumericMock(dimList2, docsWithField2); + SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); + Map dimDocIdSetIterators = Map.of("field1", d1sndv, "field3", d2sndv); + Map metricDocIdSetIterators = Map.of("field2", m1sndv); + StarTreeValues starTreeValues = new StarTreeValues(sf, null, dimDocIdSetIterators, metricDocIdSetIterators, new HashMap<>()); + + SortedNumericDocValues f2d1sndv = getSortedNumericMock(dimList, docsWithField); + SortedNumericDocValues f2d2sndv = getSortedNumericMock(dimList2, docsWithField2); + SortedNumericDocValues f2m1sndv = getSortedNumericMock(metricsList, metricsWithField); + Map f2dimDocIdSetIterators = Map.of("field1", f2d1sndv, "field3", f2d2sndv); + Map f2metricDocIdSetIterators = Map.of("field2", f2m1sndv); + StarTreeValues starTreeValues2 = new StarTreeValues(sf, null, f2dimDocIdSetIterators, f2metricDocIdSetIterators, new HashMap<>()); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, writeState, mapperService); + Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); + /** + * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] + [0, 0] | [0] + [1, 1] | [2] + [3, 3] | [6] + [4, 4] | [8] + [5, 5] | [10] + [null, 2] | [4] + */ + int count = 0; + while (starTreeDocumentIterator.hasNext()) { + count++; + StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); + assertEquals(starTreeDocument.dimensions[0] != null ? starTreeDocument.dimensions[0] * 2 : 4, starTreeDocument.metrics[0]); + } + assertEquals(6, count); + } + + private Long getLongFromDouble(Double num) { + if (num == null) { + return null; + } + return NumericUtils.doubleToSortableLong(num); + } + + private SortedNumericDocValues getSortedNumericMock(List dimList, List docsWithField) { + return new SortedNumericDocValues() { + int index = -1; + + @Override + public long nextValue() throws IOException { + return dimList.get(index); + } + + @Override + public int docValueCount() { + return 0; + } + + @Override + public boolean advanceExact(int target) throws IOException { + return false; + } + + @Override + public int docID() { + return index; + } + + @Override + public int nextDoc() throws IOException { + if (index == docsWithField.size() - 1) { + return NO_MORE_DOCS; + } + index++; + return docsWithField.get(index); + } + + @Override + public int advance(int target) throws IOException { + return 0; + } + + @Override + public long cost() { + return 0; + } + }; + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + dataOut.close(); + metaOut.close(); + directory.close(); } } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java index 564ab110fa7a5..455cd84bb44f8 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java @@ -8,6 +8,7 @@ package org.opensearch.index.compositeindex.datacube.startree.builder; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.lucene99.Lucene99Codec; import org.apache.lucene.index.FieldInfo; @@ -15,6 +16,7 @@ import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.Version; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; @@ -45,9 +47,13 @@ public class StarTreesBuilderTests extends OpenSearchTestCase { private StarTreeField starTreeField; private Map fieldProducerMap; private Directory directory; + private IndexOutput dataOut; + private IndexOutput metaOut; public void setUp() throws Exception { super.setUp(); + metaOut = mock(IndexOutput.class); + dataOut = mock(IndexOutput.class); mapperService = mock(MapperService.class); directory = newFSDirectory(createTempDir()); SegmentInfo segmentInfo = new SegmentInfo( @@ -89,7 +95,7 @@ public void test_buildWithNoStarTreeFields() throws IOException { when(mapperService.getCompositeFieldTypes()).thenReturn(new HashSet<>()); StarTreesBuilder starTreesBuilder = new StarTreesBuilder(segmentWriteState, mapperService); - starTreesBuilder.build(fieldProducerMap); + starTreesBuilder.build(metaOut, dataOut, fieldProducerMap, mock(DocValuesConsumer.class)); verifyNoInteractions(docValuesProducer); } @@ -97,7 +103,7 @@ public void test_buildWithNoStarTreeFields() throws IOException { public void test_getStarTreeBuilder() throws IOException { when(mapperService.getCompositeFieldTypes()).thenReturn(Set.of(starTreeFieldType)); StarTreesBuilder starTreesBuilder = new StarTreesBuilder(segmentWriteState, mapperService); - StarTreeBuilder starTreeBuilder = starTreesBuilder.getSingleTreeBuilder(starTreeField, segmentWriteState, mapperService); + StarTreeBuilder starTreeBuilder = starTreesBuilder.getSingleTreeBuilder(metaOut, dataOut, starTreeField, segmentWriteState, mapperService); assertTrue(starTreeBuilder instanceof OnHeapStarTreeBuilder); } @@ -106,7 +112,7 @@ public void test_getStarTreeBuilder_illegalArgument() { StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration(1, new HashSet<>(), StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP); StarTreeField starTreeField = new StarTreeField("star_tree", new ArrayList<>(), new ArrayList<>(), starTreeFieldConfiguration); StarTreesBuilder starTreesBuilder = new StarTreesBuilder(segmentWriteState, mapperService); - assertThrows(IllegalArgumentException.class, () -> starTreesBuilder.getSingleTreeBuilder(starTreeField, segmentWriteState, mapperService)); + assertThrows(IllegalArgumentException.class, () -> starTreesBuilder.getSingleTreeBuilder(metaOut, dataOut, starTreeField, segmentWriteState, mapperService)); } public void test_closeWithNoStarTreeFields() throws IOException { diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java new file mode 100644 index 0000000000000..cae4ad00c9693 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java @@ -0,0 +1,218 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.meta; + +import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util.Version; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.NumericDimension; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; +import org.opensearch.index.fielddata.IndexNumericFieldData; +import org.opensearch.index.mapper.CompositeMappedFieldType; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.UUID; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.MAGIC_MARKER; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.VERSION; +import static org.opensearch.index.mapper.CompositeMappedFieldType.CompositeFieldType.STAR_TREE; + +public class StarTreeMetaTests extends OpenSearchTestCase { + + private IndexOutput metaOut; + private IndexInput metaIn; + private StarTreeField starTreeField; + private SegmentWriteState writeState; + private Directory directory; + private FieldInfo[] fieldsInfo; + private List dimensionsOrder; + private List fields = List.of(); + private List metrics; + private List metricAggregatorInfos = new ArrayList<>(); + private int segmentDocumentCount; + private long dataFilePointer; + private long dataFileLength; + + @Before + public void setup() throws IOException { + fields = List.of("field1", "field2", "field3", "field4", "field5", "field6", "field7", "field8", "field9", "field10"); + directory = newFSDirectory(createTempDir()); + SegmentInfo segmentInfo = new SegmentInfo( + directory, + Version.LATEST, + Version.LUCENE_9_11_0, + "test_segment", + 6, + false, + false, + new Lucene99Codec(), + new HashMap<>(), + UUID.randomUUID().toString().substring(0, 16).getBytes(StandardCharsets.UTF_8), + new HashMap<>(), + null + ); + + fieldsInfo = new FieldInfo[fields.size()]; + for (int i = 0; i < fieldsInfo.length; i++) { + fieldsInfo[i] = new FieldInfo( + fields.get(i), + i, + false, + false, + true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, + DocValuesType.SORTED_NUMERIC, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + } + FieldInfos fieldInfos = new FieldInfos(fieldsInfo); + writeState = new SegmentWriteState(InfoStream.getDefault(), segmentInfo.dir, segmentInfo, fieldInfos, null, newIOContext(random())); + } + + public void test_starTreeMetadata() throws IOException { + dimensionsOrder = List.of( + new NumericDimension("field1"), + new NumericDimension("field3"), + new NumericDimension("field5"), + new NumericDimension("field8") + ); + metrics = List.of( + new Metric("field2", List.of(MetricStat.SUM)), + new Metric("field4", List.of(MetricStat.SUM)), + new Metric("field6", List.of(MetricStat.COUNT)), + new Metric("field9", List.of(MetricStat.MIN)), + new Metric("field10", List.of(MetricStat.MAX)) + ); + int maxLeafDocs = randomNonNegativeInt(); + StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration( + maxLeafDocs, + new HashSet<>(), + StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP + ); + starTreeField = new StarTreeField("star_tree", dimensionsOrder, metrics, starTreeFieldConfiguration); + + for (Metric metric : metrics) { + for (MetricStat metricType : metric.getMetrics()) { + MetricAggregatorInfo metricAggregatorInfo = new MetricAggregatorInfo( + metricType, + metric.getField(), + starTreeField.getName(), + IndexNumericFieldData.NumericType.DOUBLE + ); + metricAggregatorInfos.add(metricAggregatorInfo); + } + } + + dataFileLength = randomNonNegativeLong(); + dataFilePointer = randomNonNegativeLong(); + segmentDocumentCount = randomNonNegativeInt(); + metaOut = directory.createOutput("star-tree-metadata", IOContext.DEFAULT); + StarTreeUtils.writeStarTreeMetadata( + metaOut, + starTreeField, + writeState, + metricAggregatorInfos, + segmentDocumentCount, + dataFilePointer, + dataFileLength + ); + metaOut.close(); + metaIn = directory.openInput("star-tree-metadata", IOContext.READONCE); + assertEquals(MAGIC_MARKER, metaIn.readVLong()); + assertEquals(VERSION, metaIn.readVInt()); + + String compositeFieldName = metaIn.readString(); + CompositeMappedFieldType.CompositeFieldType compositeFieldType = CompositeMappedFieldType.CompositeFieldType.fromName( + metaIn.readString() + ); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata(metaIn, compositeFieldName, compositeFieldType); + assertEquals(starTreeField.getName(), starTreeMetadata.getCompositeFieldName()); + assertEquals(STAR_TREE, starTreeMetadata.getCompositeFieldType()); + + assertNotNull(starTreeMetadata); + + for (int i = 0; i < dimensionsOrder.size(); i++) { + assertEquals( + writeState.fieldInfos.fieldInfo(dimensionsOrder.get(i).getField()).getFieldNumber(), + starTreeMetadata.getDimensionFieldNumbers().get(i), + 0 + ); + } + + for (int i = 0; i < metricAggregatorInfos.size(); i++) { + MetricEntry metricEntry = starTreeMetadata.getMetricEntries().get(i); + assertEquals( + metricAggregatorInfos.get(i).getField(), + writeState.fieldInfos.fieldInfo(metricEntry.getMetricFieldNumber()).getName() + ); + assertEquals(metricAggregatorInfos.get(i).getMetricStat(), MetricStat.fromMetricOrdinal(metricEntry.getMetricStatOrdinal())); + } + assertEquals(segmentDocumentCount, starTreeMetadata.getSegmentAggregatedDocCount(), 0); + assertEquals(maxLeafDocs, starTreeMetadata.getMaxLeafDocs(), 0); + assertEquals( + starTreeFieldConfiguration.getSkipStarNodeCreationInDims().size(), + starTreeMetadata.getSkipStarNodeCreationInDims().size() + ); + for (String skipStarNodeCreationInDims : starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims()) { + Integer skipStarNodeCreationInDimsFieldNumber = writeState.fieldInfos.fieldInfo(skipStarNodeCreationInDims).getFieldNumber(); + assertTrue(starTreeMetadata.getSkipStarNodeCreationInDims().contains(skipStarNodeCreationInDimsFieldNumber)); + } + assertEquals(starTreeFieldConfiguration.getBuildMode(), starTreeMetadata.getStarTreeBuildMode()); + assertEquals(dataFileLength, starTreeMetadata.getDataLength()); + assertEquals(dataFilePointer, starTreeMetadata.getDataStartFilePointer()); + + metaIn.close(); + + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + metaOut.close(); + metaIn.close(); + directory.close(); + } + +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java new file mode 100644 index 0000000000000..c511aeb226890 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java @@ -0,0 +1,208 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.node; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.startree.meta.StarTreeMetadata; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; +import org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Queue; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class FixedLengthStarTreeNodeTests extends OpenSearchTestCase { + + private IndexOutput dataOut; + private IndexInput dataIn; + private Directory directory; + + @Before + public void setup() throws IOException { + directory = newFSDirectory(createTempDir()); + } + + public void test_StarTreeNode() throws IOException { + + dataOut = directory.createOutput("star-tree-data", IOContext.DEFAULT); + Map levelOrderStarTreeNodeMap = new LinkedHashMap<>(); + TreeNode root = generateSampleTree(levelOrderStarTreeNodeMap); + long starTreeDataLength = StarTreeUtils.writeStarTree(dataOut, root, 7, "star-tree"); + + //asserting on the actual length of the star tree data file + assertEquals(starTreeDataLength, 247); + dataOut.close(); + + dataIn = directory.openInput("star-tree-data", IOContext.READONCE); + + StarTreeMetadata starTreeMetadata = mock(StarTreeMetadata.class); + when(starTreeMetadata.getDataLength()).thenReturn(starTreeDataLength); + when(starTreeMetadata.getDataStartFilePointer()).thenReturn(0L); + StarTree starTree = new StarTree(dataIn, starTreeMetadata); + + StarTreeNode starTreeNode = starTree.getRoot(); + Queue queue = new ArrayDeque<>(); + queue.add(starTreeNode); + + while ((starTreeNode = queue.poll()) != null) { + + // verify the star node + assertStarTreeNode(starTreeNode, levelOrderStarTreeNodeMap.get(starTreeNode.getDimensionValue())); + + Iterator childrenIterator = starTreeNode.getChildrenIterator(); + + if (starTreeNode.getChildDimensionId() != -1) { + while (childrenIterator.hasNext()) { + StarTreeNode child = childrenIterator.next(); + assertStarTreeNode( + starTreeNode.getChildForDimensionValue(child.getDimensionValue(), false), + levelOrderStarTreeNodeMap.get(child.getDimensionValue()) + ); + queue.add(child); + } + } + } + + dataIn.close(); + + } + + private void assertStarTreeNode(StarTreeNode starTreeNode, TreeNode treeNode) throws IOException { + assertEquals(starTreeNode.getDimensionId(), treeNode.dimensionId); + assertEquals(starTreeNode.getDimensionValue(), treeNode.dimensionValue); + assertEquals(starTreeNode.getStartDocId(), treeNode.startDocId); + assertEquals(starTreeNode.getEndDocId(), treeNode.endDocId); + assertEquals(starTreeNode.getChildDimensionId(), treeNode.childDimensionId); + assertEquals(starTreeNode.getAggregatedDocId(), treeNode.aggregatedDocId); + + if (starTreeNode.getChildDimensionId() != -1) { + assertFalse(starTreeNode.isLeaf()); + if (treeNode.children != null) { + assertEquals(starTreeNode.getNumChildren(), treeNode.children.values().size()); + } + } else { + assertTrue(starTreeNode.isLeaf()); + } + + } + + private TreeNode generateSampleTree(Map levelOrderStarTreeNode) { + // Create the root node + TreeNode root = new TreeNode(); + root.dimensionId = 0; + root.startDocId = 0; + root.endDocId = 100; + root.childDimensionId = 1; + root.aggregatedDocId = randomInt(); + root.nodeType = randomByte(); + root.children = new HashMap<>(); + + levelOrderStarTreeNode.put(root.dimensionValue, root); + + // Create child nodes for dimension 1 + TreeNode dim1Node1 = new TreeNode(); + dim1Node1.dimensionId = 1; + dim1Node1.dimensionValue = 1; + dim1Node1.startDocId = 0; + dim1Node1.endDocId = 50; + dim1Node1.childDimensionId = 2; + dim1Node1.aggregatedDocId = randomInt(); + dim1Node1.nodeType = randomByte(); + dim1Node1.children = new HashMap<>(); + + TreeNode dim1Node2 = new TreeNode(); + dim1Node2.dimensionId = 1; + dim1Node2.dimensionValue = 2; + dim1Node2.startDocId = 50; + dim1Node2.endDocId = 100; + dim1Node2.childDimensionId = 2; + dim1Node2.aggregatedDocId = randomInt(); + dim1Node2.nodeType = randomByte(); + dim1Node2.children = new HashMap<>(); + + root.children.put(1L, dim1Node1); + root.children.put(2L, dim1Node2); + + levelOrderStarTreeNode.put(dim1Node1.dimensionValue, dim1Node1); + levelOrderStarTreeNode.put(dim1Node2.dimensionValue, dim1Node2); + + // Create child nodes for dimension 2 + TreeNode dim2Node1 = new TreeNode(); + dim2Node1.dimensionId = 2; + dim2Node1.dimensionValue = 3; + dim2Node1.startDocId = 0; + dim2Node1.endDocId = 25; + dim2Node1.childDimensionId = -1; + dim2Node1.aggregatedDocId = randomInt(); + dim2Node1.nodeType = randomByte(); + dim2Node1.children = null; + + TreeNode dim2Node2 = new TreeNode(); + dim2Node2.dimensionId = 2; + dim2Node2.dimensionValue = 4; + dim2Node2.startDocId = 25; + dim2Node2.endDocId = 50; + dim2Node2.childDimensionId = -1; + dim2Node2.aggregatedDocId = randomInt(); + dim2Node2.nodeType = randomByte(); + dim2Node2.children = null; + + TreeNode dim2Node3 = new TreeNode(); + dim2Node3.dimensionId = 2; + dim2Node3.dimensionValue = 5; + dim2Node3.startDocId = 50; + dim2Node3.endDocId = 75; + dim2Node3.childDimensionId = -1; + dim2Node3.aggregatedDocId = randomInt(); + dim2Node3.nodeType = randomByte(); + dim2Node3.children = null; + + TreeNode dim2Node4 = new TreeNode(); + dim2Node4.dimensionId = 2; + dim2Node4.dimensionValue = 6; + dim2Node4.startDocId = 75; + dim2Node4.endDocId = 100; + dim2Node4.childDimensionId = -1; + dim2Node4.aggregatedDocId = randomInt(); + dim2Node4.nodeType = randomByte(); + dim2Node4.children = null; + + dim1Node1.children.put(3L, dim2Node1); + dim1Node1.children.put(4L, dim2Node2); + dim1Node2.children.put(5L, dim2Node3); + dim1Node2.children.put(6L, dim2Node4); + + levelOrderStarTreeNode.put(dim2Node1.dimensionValue, dim2Node1); + levelOrderStarTreeNode.put(dim2Node2.dimensionValue, dim2Node2); + levelOrderStarTreeNode.put(dim2Node3.dimensionValue, dim2Node3); + levelOrderStarTreeNode.put(dim2Node4.dimensionValue, dim2Node4); + + return root; + } + + public void tearDown() throws Exception { + super.tearDown(); + dataIn.close(); + dataOut.close(); + directory.close(); + } + +} diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java index 6afc7c23d9e66..e07d7ef3c5a7b 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java @@ -815,6 +815,14 @@ public static long randomNonNegativeLong() { return randomLong == Long.MIN_VALUE ? 0 : Math.abs(randomLong); } + /** + * @return a int between 0 and Integer.MAX_VALUE (inclusive) chosen uniformly at random. + */ + public static int randomNonNegativeInt() { + int randomInt = randomInt(); + return randomInt == Integer.MIN_VALUE ? 0 : Math.abs(randomInt); + } + public static float randomFloat() { return random().nextFloat(); } From 6d9052ce4c7af0f6beb6205fee50bd0271426edd Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Thu, 25 Jul 2024 11:16:03 +0530 Subject: [PATCH 2/3] refactored meta, test fixes Signed-off-by: Sarthak Aggarwal --- .../composite/Composite99DocValuesReader.java | 43 +- .../CompositeIndexConstants.java | 2 +- .../aggregators/CountValueAggregator.java | 12 +- .../aggregators/MaxValueAggregator.java | 0 .../aggregators/SumValueAggregator.java | 6 +- .../startree/builder/BaseStarTreeBuilder.java | 24 +- .../builder/OnHeapStarTreeBuilder.java | 5 +- .../datacube/startree/meta/MetricEntry.java | 20 +- .../startree/meta/StarTreeMetadata.java | 64 +- .../datacube/startree/meta/TreeMetadata.java | 111 -- .../node/FixedLengthStarTreeNode.java | 13 +- .../datacube/startree/node/StarTree.java | 10 +- .../utils/SequentialDocValuesIterator.java | 2 +- .../startree/utils/StarTreeDataWriter.java | 2 +- .../startree/utils/StarTreeMetaWriter.java | 12 +- .../StarTreeDocValuesFormatTests.java | 4 +- .../builder/AbstractStarTreeBuilderTests.java | 84 +- .../builder/OnHeapStarTreeBuilderTests.java | 1074 +---------------- .../startree/meta/StarTreeMetaTests.java | 22 +- .../node/FixedLengthStarTreeNodeTests.java | 16 +- 20 files changed, 191 insertions(+), 1335 deletions(-) delete mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregator.java delete mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/TreeMetadata.java diff --git a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java index 8a2f08b9cb898..795af2cd379e9 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java @@ -32,7 +32,6 @@ import org.opensearch.index.compositeindex.CompositeIndexMetadata; import org.opensearch.index.compositeindex.datacube.Dimension; import org.opensearch.index.compositeindex.datacube.Metric; -import org.opensearch.index.compositeindex.datacube.MetricStat; import org.opensearch.index.compositeindex.datacube.ReadDimension; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; @@ -46,7 +45,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -125,7 +123,7 @@ public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState r while (true) { // validate magic marker - long magicMarker = metaIn.readVLong(); + long magicMarker = metaIn.readLong(); if (magicMarker == -1) { logger.info("EOF reached for composite index metadata"); break; @@ -161,24 +159,19 @@ public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState r compositeIndexInputMap.put(compositeFieldName, starTreeIndexInput); compositeIndexMetadataMap.put(compositeFieldName, starTreeMetadata); - List dimensionFieldNumbers = starTreeMetadata.getDimensionFieldNumbers(); + List dimensionFields = starTreeMetadata.getDimensionFields(); // generating star tree unique fields (fully qualified name for dimension and metrics) - for (Integer fieldNumber : dimensionFieldNumbers) { - fields.add( - fullyQualifiedFieldNameForStarTreeDimensionsDocValues( - compositeFieldName, - readState.fieldInfos.fieldInfo(fieldNumber).getName() - ) - ); + for (String dimensions : dimensionFields) { + fields.add(fullyQualifiedFieldNameForStarTreeDimensionsDocValues(compositeFieldName, dimensions)); } for (MetricEntry metricEntry : starTreeMetadata.getMetricEntries()) { fields.add( fullyQualifiedFieldNameForStarTreeMetricsDocValues( compositeFieldName, - readState.fieldInfos.fieldInfo(metricEntry.getMetricFieldNumber()).getName(), - MetricStat.fromMetricOrdinal(metricEntry.getMetricStatOrdinal()).getTypeName() + metricEntry.getMetricFieldName(), + metricEntry.getMetricStat().getTypeName() ) ); } @@ -285,28 +278,20 @@ public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo comp StarTreeMetadata starTreeMetadata = (StarTreeMetadata) compositeIndexMetadataMap.get(compositeIndexFieldInfo.getField()); // build skip star node dimensions - Set skipStarNodeCreationInDimsFieldNumbers = starTreeMetadata.getSkipStarNodeCreationInDims(); - Set skipStarNodeCreationInDims = new HashSet<>(); - for (Integer fieldNumber : skipStarNodeCreationInDimsFieldNumbers) { - skipStarNodeCreationInDims.add(readState.fieldInfos.fieldInfo(fieldNumber).getName()); - } + Set skipStarNodeCreationInDims = starTreeMetadata.getSkipStarNodeCreationInDims(); // build dimensions - List dimensionFieldNumbers = starTreeMetadata.getDimensionFieldNumbers(); - List dimensions = new ArrayList<>(); List readDimensions = new ArrayList<>(); - for (Integer fieldNumber : dimensionFieldNumbers) { - dimensions.add(readState.fieldInfos.fieldInfo(fieldNumber).getName()); - readDimensions.add(new ReadDimension(readState.fieldInfos.fieldInfo(fieldNumber).name)); + for (String dimension : starTreeMetadata.getDimensionFields()) { + readDimensions.add(new ReadDimension(dimension)); } // build metrics Map starTreeMetricMap = new LinkedHashMap<>(); for (MetricEntry metricEntry : starTreeMetadata.getMetricEntries()) { - String metricName = readState.fieldInfos.fieldInfo(metricEntry.getMetricFieldNumber()).getName(); - + String metricName = metricEntry.getMetricFieldName(); Metric metric = starTreeMetricMap.computeIfAbsent(metricName, field -> new Metric(field, new ArrayList<>())); - metric.getMetrics().add(MetricStat.fromMetricOrdinal(metricEntry.getMetricStatOrdinal())); + metric.getMetrics().add(metricEntry.getMetricStat()); } List starTreeMetrics = new ArrayList<>(starTreeMetricMap.values()); @@ -332,7 +317,7 @@ public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo comp Map metricsDocIdSetIteratorMap = new LinkedHashMap<>(); // get doc id set iterators for dimensions - for (String dimension : dimensions) { + for (String dimension : starTreeMetadata.getDimensionFields()) { dimensionsDocIdSetIteratorMap.put( dimension, compositeDocValuesProducer.getSortedNumeric( @@ -345,8 +330,8 @@ public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo comp for (MetricEntry metricEntry : starTreeMetadata.getMetricEntries()) { String metricFullName = fullyQualifiedFieldNameForStarTreeMetricsDocValues( starTreeField.getName(), - readState.fieldInfos.fieldInfo(metricEntry.getMetricFieldNumber()).getName(), - MetricStat.fromMetricOrdinal(metricEntry.getMetricStatOrdinal()).getTypeName() + metricEntry.getMetricFieldName(), + metricEntry.getMetricStat().getTypeName() ); metricsDocIdSetIteratorMap.put(metricFullName, compositeDocValuesProducer.getSortedNumeric(metricFullName)); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java index 1b357254cfcc4..2c3cd06da13da 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java @@ -26,6 +26,6 @@ public class CompositeIndexConstants { /** * Represents the key to fetch number of documents in a segment. */ - public static final String SEGMENT_DOCS_COUNT = "segment_docs_count"; + public static final String SEGMENT_DOCS_COUNT = "segmentDocsCount"; } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java index 0eb5614e829ca..bedd6a4b374b4 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java @@ -46,7 +46,17 @@ public Long mergeAggregatedValueAndSegmentValue(Long value, Long segmentDocValue @Override public Long mergeAggregatedValues(Long value, Long aggregatedValue) { - return value + aggregatedValue; + + long totalCount = getIdentityMetricValue(); + if (value != null) { + totalCount += value; + } + + if (aggregatedValue != null) { + totalCount += aggregatedValue; + } + + return totalCount; } @Override diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregator.java deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java index c76b668d2ba3e..730adae49fbed 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java @@ -63,7 +63,11 @@ public Double mergeAggregatedValueAndSegmentValue(Double value, Long segmentDocV public Double mergeAggregatedValues(Double value, Double aggregatedValue) { assert kahanSummation.value() == aggregatedValue; kahanSummation.reset(sum, compensation); - kahanSummation.add(value); + if (value != null) { + kahanSummation.add(value); + } else { + kahanSummation.add(getIdentityMetricValue()); + } compensation = kahanSummation.delta(); sum = kahanSummation.value(); return kahanSummation.value(); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index 7cc0360f76e32..72e1152154f1d 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -23,6 +23,7 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.Counter; import org.apache.lucene.util.NumericUtils; +import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; import org.opensearch.index.compositeindex.datacube.Dimension; import org.opensearch.index.compositeindex.datacube.Metric; import org.opensearch.index.compositeindex.datacube.MetricStat; @@ -31,7 +32,6 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; import org.opensearch.index.compositeindex.datacube.startree.aggregators.ValueAggregator; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; @@ -90,7 +90,6 @@ public abstract class BaseStarTreeBuilder implements StarTreeBuilder { private final IndexOutput metaOut; private final IndexOutput dataOut; - static String NUM_SEGMENT_DOCS = "numSegmentDocs"; /** * Reads all the configuration related to dimensions and metrics, builds a star-tree based on the different construction parameters. @@ -179,6 +178,9 @@ public List getMetricReaders(SegmentWriteState stat SequentialDocValuesIterator metricReader; FieldInfo metricFieldInfo = state.fieldInfos.fieldInfo(metric.getField()); if (metricStat != MetricStat.COUNT) { + if (metricFieldInfo == null) { + metricFieldInfo = getFieldInfo(metric.getField()); + } metricReader = new SequentialDocValuesIterator( fieldProducerMap.get(metricFieldInfo.name).getSortedNumeric(metricFieldInfo) ); @@ -250,7 +252,7 @@ public void build( if (numStarTreeDocs == 0) { // serialize the star tree data - serializeStarTree(numSegmentStarTreeDocument); + serializeStarTree(numStarTreeDocument); return; } @@ -270,7 +272,7 @@ public void build( createSortedDocValuesIndices(starTreeDocValuesConsumer, fieldNumberAcrossStarTrees); // serialize star-tree - serializeStarTree(numSegmentStarTreeDocument); + serializeStarTree(numStarTreeDocument); } private void serializeStarTree(int numSegmentStarTreeDocument) throws IOException { @@ -335,10 +337,15 @@ private void createSortedDocValuesIndices(DocValuesConsumer docValuesConsumer, A try { switch (metricAggregatorInfos.get(i).getValueAggregators().getAggregatedValueType()) { case LONG: - metricWriters.get(i).addValue(docId, (Long) starTreeDocument.metrics[i]); + if (starTreeDocument.metrics[i] != null) { + metricWriters.get(i).addValue(docId, (Long) starTreeDocument.metrics[i]); + } break; case DOUBLE: - metricWriters.get(i).addValue(docId, NumericUtils.doubleToSortableLong((Double) starTreeDocument.metrics[i])); + if (starTreeDocument.metrics[i] != null) { + metricWriters.get(i) + .addValue(docId, NumericUtils.doubleToSortableLong((Double) starTreeDocument.metrics[i])); + } break; default: throw new IllegalStateException("Unknown metric doc value type"); @@ -528,7 +535,9 @@ protected StarTreeDocument reduceSegmentStarTreeDocuments( if (isMerge) { metrics[i] = metricValueAggregator.getInitialAggregatedValue(segmentDocument.metrics[i]); } else { - metrics[i] = metricValueAggregator.getInitialAggregatedValueForSegmentDocValue(getLong(segmentDocument.metrics[i], metricValueAggregator.getIdentityMetricValue())); + metrics[i] = metricValueAggregator.getInitialAggregatedValueForSegmentDocValue( + getLong(segmentDocument.metrics[i], metricValueAggregator.getIdentityMetricValue()) + ); } } catch (Exception e) { @@ -626,6 +635,7 @@ public StarTreeDocument reduceStarTreeDocuments(StarTreeDocument aggregatedDocum return aggregatedDocument; } } + private static FieldInfo getFieldInfo(String field) { return new FieldInfo( field, diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java index fec001922edc2..194be5a2928ae 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java @@ -28,6 +28,8 @@ import java.util.Objects; import java.util.concurrent.atomic.AtomicInteger; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.SEGMENT_DOCS_COUNT; + /** * On heap single tree builder * @@ -140,10 +142,9 @@ StarTreeDocument[] getSegmentsStarTreeDocuments(List starTreeVal metricReaders.add(new SequentialDocValuesIterator(metricDocValuesEntry.getValue())); } - boolean endOfDoc = false; int currentDocId = 0; int numSegmentDocs = Integer.parseInt( - starTreeValues.getAttributes().getOrDefault(NUM_SEGMENT_DOCS, String.valueOf(DocIdSetIterator.NO_MORE_DOCS)) + starTreeValues.getAttributes().getOrDefault(SEGMENT_DOCS_COUNT, String.valueOf(DocIdSetIterator.NO_MORE_DOCS)) ); while (currentDocId < numSegmentDocs) { Long[] dims = new Long[dimensionsSplitOrder.size()]; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/MetricEntry.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/MetricEntry.java index 5184dc57174b2..5d3717dc65670 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/MetricEntry.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/MetricEntry.java @@ -8,6 +8,8 @@ package org.opensearch.index.compositeindex.datacube.startree.meta; +import org.opensearch.index.compositeindex.datacube.MetricStat; + /** * Holds the pair of metric name and it's associated stat * @@ -15,19 +17,19 @@ */ public class MetricEntry { - private final int metricFieldNumber; - private final int metricStatOrdinal; + private final String metricFieldName; + private final MetricStat metricStat; - public MetricEntry(int metricFieldNumber, int metricStatOrdinal) { - this.metricFieldNumber = metricFieldNumber; - this.metricStatOrdinal = metricStatOrdinal; + public MetricEntry(String metricFieldName, MetricStat metricStat) { + this.metricFieldName = metricFieldName; + this.metricStat = metricStat; } - public int getMetricFieldNumber() { - return metricFieldNumber; + public String getMetricFieldName() { + return metricFieldName; } - public int getMetricStatOrdinal() { - return metricStatOrdinal; + public MetricStat getMetricStat() { + return metricStat; } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetadata.java index 2dc306d651727..2019ef912ec04 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetadata.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetadata.java @@ -12,6 +12,7 @@ import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.store.IndexInput; import org.opensearch.index.compositeindex.CompositeIndexMetadata; +import org.opensearch.index.compositeindex.datacube.MetricStat; import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.mapper.CompositeMappedFieldType; @@ -26,16 +27,16 @@ * * @opensearch.experimental */ -public class StarTreeMetadata extends CompositeIndexMetadata implements TreeMetadata { - private static final Logger logger = LogManager.getLogger(TreeMetadata.class); +public class StarTreeMetadata extends CompositeIndexMetadata { + private static final Logger logger = LogManager.getLogger(StarTreeMetadata.class); private final IndexInput meta; private final String starTreeFieldName; private final String starTreeFieldType; - private final List dimensionFieldNumbers; + private final List dimensionFields; private final List metricEntries; private final Integer segmentAggregatedDocCount; private final Integer maxLeafDocs; - private final Set skipStarNodeCreationInDims; + private final Set skipStarNodeCreationInDims; private final StarTreeFieldConfiguration.StarTreeBuildMode starTreeBuildMode; private final long dataStartFilePointer; private final long dataLength; @@ -47,7 +48,7 @@ public StarTreeMetadata(IndexInput meta, String compositeFieldName, CompositeMap try { this.starTreeFieldName = this.getCompositeFieldName(); this.starTreeFieldType = this.getCompositeFieldType().getName(); - this.dimensionFieldNumbers = readStarTreeDimensions(); + this.dimensionFields = readStarTreeDimensions(); this.metricEntries = readMetricEntries(); this.segmentAggregatedDocCount = readSegmentAggregatedDocCount(); this.maxLeafDocs = readMaxLeafDocs(); @@ -61,80 +62,69 @@ public StarTreeMetadata(IndexInput meta, String compositeFieldName, CompositeMap } } - @Override - public int readDimensionsCount() throws IOException { + private int readDimensionsCount() throws IOException { return meta.readVInt(); } - @Override - public List readStarTreeDimensions() throws IOException { + private List readStarTreeDimensions() throws IOException { int dimensionCount = readDimensionsCount(); - List dimensionFieldNumbers = new ArrayList<>(); + List dimensionFields = new ArrayList<>(); for (int i = 0; i < dimensionCount; i++) { - dimensionFieldNumbers.add(meta.readVInt()); + dimensionFields.add(meta.readString()); } - return dimensionFieldNumbers; + return dimensionFields; } - @Override - public int readMetricsCount() throws IOException { + private int readMetricsCount() throws IOException { return meta.readVInt(); } - @Override - public List readMetricEntries() throws IOException { + private List readMetricEntries() throws IOException { int metricCount = readMetricsCount(); List metricEntries = new ArrayList<>(); for (int i = 0; i < metricCount; i++) { - int metricFieldNumber = meta.readVInt(); + String metricFieldName = meta.readString(); int metricStatOrdinal = meta.readVInt(); - metricEntries.add(new MetricEntry(metricFieldNumber, metricStatOrdinal)); + metricEntries.add(new MetricEntry(metricFieldName, MetricStat.fromMetricOrdinal(metricStatOrdinal))); } return metricEntries; } - @Override - public int readSegmentAggregatedDocCount() throws IOException { + private int readSegmentAggregatedDocCount() throws IOException { return meta.readVInt(); } - @Override - public int readMaxLeafDocs() throws IOException { + private int readMaxLeafDocs() throws IOException { return meta.readVInt(); } - @Override - public int readSkipStarNodeCreationInDimsCount() throws IOException { + private int readSkipStarNodeCreationInDimsCount() throws IOException { return meta.readVInt(); } - @Override - public Set readSkipStarNodeCreationInDims() throws IOException { + private Set readSkipStarNodeCreationInDims() throws IOException { int skipStarNodeCreationInDimsCount = readSkipStarNodeCreationInDimsCount(); - Set skipStarNodeCreationInDims = new HashSet<>(); + Set skipStarNodeCreationInDims = new HashSet<>(); for (int i = 0; i < skipStarNodeCreationInDimsCount; i++) { - skipStarNodeCreationInDims.add(meta.readVInt()); + skipStarNodeCreationInDims.add(meta.readString()); } return skipStarNodeCreationInDims; } - @Override - public StarTreeFieldConfiguration.StarTreeBuildMode readBuildMode() throws IOException { + private StarTreeFieldConfiguration.StarTreeBuildMode readBuildMode() throws IOException { return StarTreeFieldConfiguration.StarTreeBuildMode.fromBuildModeOrdinal(meta.readByte()); } - @Override - public long readDataStartFilePointer() throws IOException { + private long readDataStartFilePointer() throws IOException { return meta.readVLong(); } - @Override - public long readDataLength() throws IOException { + private long readDataLength() throws IOException { return meta.readVLong(); } @@ -161,8 +151,8 @@ public String getStarTreeFieldType() { * * @return star-tree dimension field numbers */ - public List getDimensionFieldNumbers() { - return dimensionFieldNumbers; + public List getDimensionFields() { + return dimensionFields; } /** @@ -197,7 +187,7 @@ public Integer getMaxLeafDocs() { * * @return the set of dimensions. */ - public Set getSkipStarNodeCreationInDims() { + public Set getSkipStarNodeCreationInDims() { return skipStarNodeCreationInDims; } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/TreeMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/TreeMetadata.java deleted file mode 100644 index fa2dc8ad22562..0000000000000 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/TreeMetadata.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.compositeindex.datacube.startree.meta; - -import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; - -import java.io.IOException; -import java.util.List; -import java.util.Set; - -/** - * An interface for metadata of the star-tree - * - * @opensearch.experimental - */ -public interface TreeMetadata { - - /** - * Reads the count of dimensions in the star-tree. - * - * @return the count of dimensions - * @throws IOException if an I/O error occurs while reading the dimensions count - */ - int readDimensionsCount() throws IOException; - - /** - * Reads the list of dimension ordinals in the star-tree. - * - * @return the list of dimension ordinals - * @throws IOException if an I/O error occurs while reading the dimension ordinals - */ - List readStarTreeDimensions() throws IOException; - - /** - * Reads the count of metrics in the star-tree. - * - * @return the count of metrics - * @throws IOException if an I/O error occurs while reading the metrics count - */ - int readMetricsCount() throws IOException; - - /** - * Reads the list of metric entries in the star-tree. - * - * @return the list of metric entries - * @throws IOException if an I/O error occurs while reading the metric entries - */ - List readMetricEntries() throws IOException; - - /** - * Reads the aggregated document count for the segment in the star-tree. - * - * @return the aggregated document count for the segment - * @throws IOException if an I/O error occurs while reading the aggregated document count - */ - int readSegmentAggregatedDocCount() throws IOException; - - /** - * Reads the max leaf docs for the star-tree. - * - * @return the max leaf docs for the star-tree - * @throws IOException if an I/O error occurs while reading the max leaf docs - */ - int readMaxLeafDocs() throws IOException; - - /** - * Reads the count of dimensions where star node will not be created in the star-tree. - * - * @return the count of dimensions - * @throws IOException if an I/O error occurs while reading the skip star node dimensions count - */ - int readSkipStarNodeCreationInDimsCount() throws IOException; - - /** - * Reads the list of dimensions field numbers to be skipped for star node creation in the star-tree. - * - * @return the set of dimensions field numbers to be skipped for star node creation. - * @throws IOException if an I/O error occurs while reading the dimensions - */ - Set readSkipStarNodeCreationInDims() throws IOException; - - /** - * Reads the build mode for the star-tree. - * - * @return the star-tree build mode - * @throws IOException if an I/O error occurs while reading the build mode - */ - StarTreeFieldConfiguration.StarTreeBuildMode readBuildMode() throws IOException; - - /** - * Reads the file pointer to the start of the star-tree data. - * - * @return the file pointer to the start of the star-tree data - * @throws IOException if an I/O error occurs while reading the star-tree data start file pointer - */ - long readDataStartFilePointer() throws IOException; - - /** - * Reads the length of the data of the star-tree. - * - * @return the length of the data - * @throws IOException if an I/O error occurs while reading the data length - */ - long readDataLength() throws IOException; -} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java index 4c22e1a2d19dc..4edb45762084d 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java @@ -30,8 +30,8 @@ public class FixedLengthStarTreeNode implements StarTreeNode { private static final int START_DOC_ID_OFFSET = DIMENSION_VALUE_OFFSET + Long.BYTES; private static final int END_DOC_ID_OFFSET = START_DOC_ID_OFFSET + Integer.BYTES; private static final int AGGREGATE_DOC_ID_OFFSET = END_DOC_ID_OFFSET + Integer.BYTES; - private static final int STAR_NODE_TYPE_OFFSET = AGGREGATE_DOC_ID_OFFSET + Byte.BYTES; - private static final int FIRST_CHILD_ID_OFFSET = STAR_NODE_TYPE_OFFSET + Integer.BYTES; + private static final int STAR_NODE_TYPE_OFFSET = AGGREGATE_DOC_ID_OFFSET + Integer.BYTES; + private static final int FIRST_CHILD_ID_OFFSET = STAR_NODE_TYPE_OFFSET + Byte.BYTES; private static final int LAST_CHILD_ID_OFFSET = FIRST_CHILD_ID_OFFSET + Integer.BYTES; public static final int INVALID_ID = -1; @@ -47,6 +47,15 @@ public FixedLengthStarTreeNode(RandomAccessInput in, int nodeId) throws IOExcept firstChildId = getInt(FIRST_CHILD_ID_OFFSET); } + // output.writeInt(node.dimensionId); + // output.writeLong(node.dimensionValue); + // output.writeInt(node.startDocId); + // output.writeInt(node.endDocId); + // output.writeInt(node.aggregatedDocId); + // output.writeByte(node.nodeType); + // output.writeInt(firstChildId); + // output.writeInt(lastChildId); + private int getInt(int fieldOffset) throws IOException { return in.readInt(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java index 21041841cf789..a1eae64fa5f56 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java @@ -12,6 +12,7 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.RandomAccessInput; import org.opensearch.index.compositeindex.datacube.startree.meta.StarTreeMetadata; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeDataWriter; import java.io.IOException; @@ -34,14 +35,17 @@ public StarTree(IndexInput data, StarTreeMetadata starTreeMetadata) throws IOExc logger.error("Invalid magic marker"); throw new IOException("Invalid magic marker"); } - int version = data.readVInt(); + int version = data.readInt(); if (VERSION != version) { logger.error("Invalid star tree version"); throw new IOException("Invalid version"); } - numNodes = data.readVInt(); // num nodes + numNodes = data.readInt(); // num nodes - RandomAccessInput in = data.randomAccessSlice(starTreeMetadata.getDataStartFilePointer(), starTreeMetadata.getDataLength()); + RandomAccessInput in = data.randomAccessSlice( + StarTreeDataWriter.computeStarTreeDataHeaderByteSize(), + starTreeMetadata.getDataLength() - StarTreeDataWriter.computeStarTreeDataHeaderByteSize() + ); root = new FixedLengthStarTreeNode(in, 0); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java index c76f1e0bbf8e6..bf1f1d52b4eee 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java @@ -49,7 +49,7 @@ public SequentialDocValuesIterator(DocIdSetIterator docIdSetIterator) { } /** - * Constructs a new SequentialDocValuesIterator instance with the given SortedNumericDocValues. + * Constructs a new SequentialDocValuesIterator instance with an empty sorted numeric. * */ public SequentialDocValuesIterator() { diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataWriter.java index 89148f3c20ab5..a637064d8b3e1 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataWriter.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataWriter.java @@ -62,7 +62,7 @@ public static long writeStarTree(IndexOutput indexOutput, TreeNode rootNode, int * * @return the byte size of the star-tree data header */ - private static int computeStarTreeDataHeaderByteSize() { + public static int computeStarTreeDataHeaderByteSize() { // Magic marker (8), version (4) int headerSizeInBytes = 12; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaWriter.java index a21939798b5e4..994793914e079 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaWriter.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaWriter.java @@ -84,7 +84,7 @@ private static void writeMetaHeader( String starTreeFieldName ) throws IOException { // magic marker for sanity - metaOut.writeVLong(MAGIC_MARKER); + metaOut.writeLong(MAGIC_MARKER); // version metaOut.writeVInt(VERSION); @@ -123,8 +123,7 @@ private static void writeMeta( // dimensions for (Dimension dimension : starTreeField.getDimensionsOrder()) { - int dimensionFieldNumber = writeState.fieldInfos.fieldInfo(dimension.getField()).getFieldNumber(); - metaOut.writeVInt(dimensionFieldNumber); + metaOut.writeString(dimension.getField()); } // number of metrics @@ -132,10 +131,8 @@ private static void writeMeta( // metric - metric stat pair for (MetricAggregatorInfo metricAggregatorInfo : metricAggregatorInfos) { - String metricName = metricAggregatorInfo.getField(); - int metricFieldNumber = writeState.fieldInfos.fieldInfo(metricName).getFieldNumber(); + metaOut.writeString(metricAggregatorInfo.getField()); int metricStatOrdinal = metricAggregatorInfo.getMetricStat().getMetricOrdinal(); - metaOut.writeVInt(metricFieldNumber); metaOut.writeVInt(metricStatOrdinal); } @@ -150,8 +147,7 @@ private static void writeMeta( // skip star node creations for (String dimension : starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims()) { - int dimensionFieldNumber = writeState.fieldInfos.fieldInfo(dimension).getFieldNumber(); - metaOut.writeVInt(dimensionFieldNumber); + metaOut.writeString(dimension); } // star tree build-mode diff --git a/server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java b/server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java index e7ec9f7fd107d..b01d239a99671 100644 --- a/server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java +++ b/server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java @@ -80,7 +80,7 @@ protected Codec getCodec() { final Logger testLogger = LogManager.getLogger(StarTreeDocValuesFormatTests.class); try { - createMapperService(getExpandedMapping("status", "size")); + createMapperService(getExpandedMapping()); } catch (IOException e) { throw new RuntimeException(e); } @@ -144,7 +144,7 @@ public void testStarTreeDocValues() throws IOException { directory.close(); } - private XContentBuilder getExpandedMapping(String dim, String metric) throws IOException { + private XContentBuilder getExpandedMapping() throws IOException { return topMapping(b -> { b.startObject("composite"); b.startObject("startree"); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java index 76a7875919a8b..b9a5a97cf504d 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java @@ -8,6 +8,7 @@ package org.opensearch.index.compositeindex.datacube.startree.builder; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.lucene99.Lucene99Codec; import org.apache.lucene.index.DocValues; @@ -15,6 +16,7 @@ import org.apache.lucene.index.EmptyDocValuesProducer; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; @@ -24,11 +26,14 @@ import org.apache.lucene.sandbox.document.HalfFloatPoint; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.Version; import org.opensearch.common.settings.Settings; +import org.opensearch.index.codec.composite.Composite99DocValuesFormat; import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; +import org.opensearch.index.compositeindex.CompositeIndexConstants; import org.opensearch.index.compositeindex.datacube.Dimension; import org.opensearch.index.compositeindex.datacube.Metric; import org.opensearch.index.compositeindex.datacube.MetricStat; @@ -36,6 +41,7 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; import org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode; import org.opensearch.index.mapper.ContentPath; @@ -61,8 +67,8 @@ import java.util.Queue; import java.util.Set; import java.util.UUID; +import java.util.concurrent.atomic.AtomicInteger; -import static org.opensearch.index.compositeindex.datacube.startree.builder.BaseStarTreeBuilder.NUM_SEGMENT_DOCS; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -76,11 +82,15 @@ public abstract class AbstractStarTreeBuilderTests extends OpenSearchTestCase { protected StarTreeField compositeField; protected Map fieldProducerMap; protected SegmentWriteState writeState; - private BaseStarTreeBuilder builder; + protected BaseStarTreeBuilder builder; + protected IndexOutput dataOut; + protected IndexOutput metaOut; + protected DocValuesConsumer docValuesConsumer; @Before public void setup() throws IOException { fields = List.of("field1", "field2", "field3", "field4", "field5", "field6", "field7", "field8", "field9", "field10"); + docValuesConsumer = mock(DocValuesConsumer.class); dimensionsOrder = List.of( new NumericDimension("field1"), @@ -130,6 +140,20 @@ public void setup() throws IOException { } writeState = getWriteState(5); + String dataFileName = IndexFileNames.segmentFileName( + writeState.segmentInfo.name, + writeState.segmentSuffix, + Composite99DocValuesFormat.DATA_EXTENSION + ); + dataOut = writeState.directory.createOutput(dataFileName, writeState.context); + + String metaFileName = IndexFileNames.segmentFileName( + writeState.segmentInfo.name, + writeState.segmentSuffix, + Composite99DocValuesFormat.META_EXTENSION + ); + metaOut = writeState.directory.createOutput(metaFileName, writeState.context); + mapperService = mock(MapperService.class); DocumentMapper documentMapper = mock(DocumentMapper.class); when(mapperService.documentMapper()).thenReturn(documentMapper); @@ -665,7 +689,7 @@ public void test_build_halfFloatMetrics() throws IOException { dimsIterators, metricsIterators ); - builder.build(segmentStarTreeDocumentIterator); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); List resultStarTreeDocuments = builder.getStarTreeDocuments(); assertEquals(7, resultStarTreeDocuments.size()); @@ -718,7 +742,7 @@ public void test_build_floatMetrics() throws IOException { dimsIterators, metricsIterators ); - builder.build(segmentStarTreeDocumentIterator); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); List resultStarTreeDocuments = builder.getStarTreeDocuments(); assertEquals(7, resultStarTreeDocuments.size()); @@ -772,7 +796,7 @@ public void test_build_longMetrics() throws IOException { dimsIterators, metricsIterators ); - builder.build(segmentStarTreeDocumentIterator); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); List resultStarTreeDocuments = builder.getStarTreeDocuments(); assertEquals(7, resultStarTreeDocuments.size()); @@ -821,7 +845,7 @@ public void test_build() throws IOException { dimsIterators, metricsIterators ); - builder.build(segmentStarTreeDocumentIterator); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); List resultStarTreeDocuments = builder.getStarTreeDocuments(); assertEquals(7, resultStarTreeDocuments.size()); @@ -944,13 +968,13 @@ public void test_build_starTreeDataset() throws IOException { dimsIterators, metricsIterators ); - builder.build(segmentStarTreeDocumentIterator); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); List resultStarTreeDocuments = builder.getStarTreeDocuments(); Iterator expectedStarTreeDocumentIterator = expectedStarTreeDocuments(); Iterator resultStarTreeDocumentIterator = resultStarTreeDocuments.iterator(); Map> dimValueToDocIdMap = new HashMap<>(); - builder.rootNode.isStarNode = true; + builder.rootNode.nodeType = StarTreeNodeType.STAR.getValue(); traverseStarTree(builder.rootNode, dimValueToDocIdMap, true); Map> expectedDimToValueMap = getExpectedDimToValueMap(); @@ -1055,7 +1079,7 @@ public void testFlushFlow() throws IOException { SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); SortedNumericDocValues m2sndv = getSortedNumericMock(metricsList, metricsWithField); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(6), mapperService); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, getWriteState(6), mapperService); SequentialDocValuesIterator[] dimDvs = { new SequentialDocValuesIterator(d1sndv), new SequentialDocValuesIterator(d2sndv) }; Iterator starTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimDvs, @@ -1126,7 +1150,7 @@ public void testFlushFlowBuild() throws IOException { DocValuesProducer d2vp = getDocValuesProducer(d2sndv); DocValuesProducer m1vp = getDocValuesProducer(m1sndv); Map fieldProducerMap = Map.of("field1", d1vp, "field3", d2vp, "field2", m1vp); - builder.build(fieldProducerMap); + builder.build(fieldProducerMap, new AtomicInteger(), docValuesConsumer); /** * Asserting following dim / metrics [ dim1, dim2 / Sum [ metric] ] [0, 0] | [0.0] @@ -1209,7 +1233,7 @@ public void testMergeFlowWithSum() throws IOException { sf, "6" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(6), mapperService); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, getWriteState(6), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Sum [ metric] ] @@ -1259,7 +1283,7 @@ public void testMergeFlowWithCount() throws IOException { sf, "6" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(6), mapperService); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, getWriteState(6), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1298,7 +1322,7 @@ private StarTreeValues getStarTreeValues( null, dimDocIdSetIterators, metricDocIdSetIterators, - Map.of("numSegmentDocs", number) + Map.of(CompositeIndexConstants.SEGMENT_DOCS_COUNT, number) ); return starTreeValues; } @@ -1336,7 +1360,7 @@ public void testMergeFlowWithDifferentDocsFromSegments() throws IOException { sf, "4" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(4), mapperService); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, getWriteState(4), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1396,7 +1420,7 @@ public void testMergeFlowWithMissingDocs() throws IOException { sf, "4" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(4), mapperService); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, getWriteState(4), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1456,7 +1480,7 @@ public void testMergeFlowWithMissingDocsInSecondDim() throws IOException { sf, "4" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(4), mapperService); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, getWriteState(4), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1517,7 +1541,7 @@ public void testMergeFlowWithDocsMissingAtTheEnd() throws IOException { sf, "4" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, writeState, mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1569,7 +1593,7 @@ public void testMergeFlowWithEmptyFieldsInOneSegment() throws IOException { sf, "0" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(0), mapperService); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, getWriteState(0), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1664,8 +1688,8 @@ public void testMergeFlowWithDuplicateDimensionValues() throws IOException { metricsWithField, sf ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); - builder.build(List.of(starTreeValues, starTreeValues2)); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, writeState, mapperService); + builder.build(List.of(starTreeValues, starTreeValues2), new AtomicInteger(), docValuesConsumer); List starTreeDocuments = builder.getStarTreeDocuments(); assertEquals(401, starTreeDocuments.size()); int count = 0; @@ -1774,8 +1798,8 @@ public void testMergeFlowWithMaxLeafDocs() throws IOException { sf ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); - builder.build(List.of(starTreeValues, starTreeValues2)); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, writeState, mapperService); + builder.build(List.of(starTreeValues, starTreeValues2), new AtomicInteger(), docValuesConsumer); List starTreeDocuments = builder.getStarTreeDocuments(); /** 635 docs get generated @@ -1892,8 +1916,8 @@ public void testMergeFlowWithDuplicateDimensionValueWithMaxLeafDocs() throws IOE metricsWithField, sf ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); - builder.build(List.of(starTreeValues, starTreeValues2)); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, writeState, mapperService); + builder.build(List.of(starTreeValues, starTreeValues2), new AtomicInteger(), docValuesConsumer); List starTreeDocuments = builder.getStarTreeDocuments(); assertEquals(401, starTreeDocuments.size()); builder.close(); @@ -1991,8 +2015,8 @@ public void testMergeFlowWithMaxLeafDocsAndStarTreeNodesAssertion() throws IOExc metricsWithField, sf ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); - builder.build(List.of(starTreeValues, starTreeValues2)); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, writeState, mapperService); + builder.build(List.of(starTreeValues, starTreeValues2), new AtomicInteger(), docValuesConsumer); List starTreeDocuments = builder.getStarTreeDocuments(); Map> dimValueToDocIdMap = new HashMap<>(); traverseStarTree(builder.rootNode, dimValueToDocIdMap, true); @@ -2044,14 +2068,14 @@ private void traverseStarTree(TreeNode root, Map> di // store aggregated document of the node int docId = starTreeNode.aggregatedDocId; Map map = dimValueToDocIdMap.getOrDefault(dimensionId, new HashMap<>()); - if (starTreeNode.isStarNode) { + if (starTreeNode.nodeType == StarTreeNodeType.STAR.getValue()) { map.put(Long.MAX_VALUE, docId); } else { map.put(starTreeNode.dimensionValue, docId); } dimValueToDocIdMap.put(dimensionId, map); - if (starTreeNode.children != null && (!traverStarNodes || starTreeNode.isStarNode)) { + if (starTreeNode.children != null && (!traverStarNodes || (starTreeNode.nodeType == StarTreeNodeType.STAR.getValue()))) { Iterator childrenIterator = starTreeNode.children.values().iterator(); while (childrenIterator.hasNext()) { TreeNode childNode = childrenIterator.next(); @@ -2171,7 +2195,7 @@ public void testMergeFlow() throws IOException { } Map getAttributes(int numSegmentDocs) { - return Map.of(String.valueOf(NUM_SEGMENT_DOCS), String.valueOf(numSegmentDocs)); + return Map.of(CompositeIndexConstants.SEGMENT_DOCS_COUNT, String.valueOf(numSegmentDocs)); } private static StarTreeField getStarTreeField(MetricStat count) { @@ -2246,6 +2270,8 @@ public void tearDown() throws Exception { if (builder != null) { builder.close(); } + metaOut.close(); + dataOut.close(); directory.close(); } } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java index 5667ca428f648..315f8cb94b6e5 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java @@ -8,1080 +8,20 @@ package org.opensearch.index.compositeindex.datacube.startree.builder; -import org.apache.lucene.codecs.DocValuesConsumer; -import org.apache.lucene.codecs.DocValuesProducer; -import org.apache.lucene.codecs.lucene99.Lucene99Codec; -import org.apache.lucene.index.DocValuesType; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.index.VectorEncoding; -import org.apache.lucene.index.VectorSimilarityFunction; -import org.apache.lucene.sandbox.document.HalfFloatPoint; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.InfoStream; -import org.apache.lucene.util.NumericUtils; -import org.apache.lucene.util.Version; -import org.opensearch.common.settings.Settings; -import org.opensearch.index.codec.composite.Composite99DocValuesFormat; -import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; -import org.opensearch.index.compositeindex.datacube.Dimension; -import org.opensearch.index.compositeindex.datacube.Metric; -import org.opensearch.index.compositeindex.datacube.MetricStat; -import org.opensearch.index.compositeindex.datacube.NumericDimension; -import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; -import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; -import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; -import org.opensearch.index.mapper.ContentPath; -import org.opensearch.index.mapper.DocumentMapper; -import org.opensearch.index.mapper.Mapper; import org.opensearch.index.mapper.MapperService; -import org.opensearch.index.mapper.MappingLookup; -import org.opensearch.index.mapper.NumberFieldMapper; -import org.opensearch.test.OpenSearchTestCase; -import org.junit.Before; import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.atomic.AtomicInteger; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -public class OnHeapStarTreeBuilderTests extends OpenSearchTestCase { - - private OnHeapStarTreeBuilder builder; - private MapperService mapperService; - private List dimensionsOrder; - private List fields = List.of(); - private List metrics; - private Directory directory; - private FieldInfo[] fieldsInfo; - private StarTreeField compositeField; - private Map fieldProducerMap; - private SegmentWriteState writeState; - private IndexOutput dataOut; - private IndexOutput metaOut; - private DocValuesConsumer docValuesConsumer; - - @Before - public void setup() throws IOException { - fields = List.of("field1", "field2", "field3", "field4", "field5", "field6", "field7", "field8", "field9", "field10"); - - dimensionsOrder = List.of( - new NumericDimension("field1"), - new NumericDimension("field3"), - new NumericDimension("field5"), - new NumericDimension("field8") - ); - metrics = List.of( - new Metric("field2", List.of(MetricStat.SUM)), - new Metric("field4", List.of(MetricStat.SUM)), - new Metric("field6", List.of(MetricStat.COUNT)), - new Metric("field9", List.of(MetricStat.MIN)), - new Metric("field10", List.of(MetricStat.MAX)) - ); - - DocValuesProducer docValuesProducer = mock(DocValuesProducer.class); - docValuesConsumer = mock(DocValuesConsumer.class); - - compositeField = new StarTreeField( - "test", - dimensionsOrder, - metrics, - new StarTreeFieldConfiguration(1, Set.of("field8"), StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP) - ); - directory = newFSDirectory(createTempDir()); - SegmentInfo segmentInfo = new SegmentInfo( - directory, - Version.LATEST, - Version.LUCENE_9_11_0, - "test_segment", - 6, - false, - false, - new Lucene99Codec(), - new HashMap<>(), - UUID.randomUUID().toString().substring(0, 16).getBytes(StandardCharsets.UTF_8), - new HashMap<>(), - null - ); - - fieldsInfo = new FieldInfo[fields.size()]; - fieldProducerMap = new HashMap<>(); - for (int i = 0; i < fieldsInfo.length; i++) { - fieldsInfo[i] = new FieldInfo( - fields.get(i), - i, - false, - false, - true, - IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, - DocValuesType.SORTED_NUMERIC, - -1, - Collections.emptyMap(), - 0, - 0, - 0, - 0, - VectorEncoding.FLOAT32, - VectorSimilarityFunction.EUCLIDEAN, - false, - false - ); - fieldProducerMap.put(fields.get(i), docValuesProducer); - } - FieldInfos fieldInfos = new FieldInfos(fieldsInfo); - writeState = new SegmentWriteState(InfoStream.getDefault(), segmentInfo.dir, segmentInfo, fieldInfos, null, newIOContext(random())); - - String dataFileName = IndexFileNames.segmentFileName( - writeState.segmentInfo.name, - writeState.segmentSuffix, - Composite99DocValuesFormat.DATA_EXTENSION - ); - dataOut = writeState.directory.createOutput(dataFileName, writeState.context); - - String metaFileName = IndexFileNames.segmentFileName( - writeState.segmentInfo.name, - writeState.segmentSuffix, - Composite99DocValuesFormat.META_EXTENSION - ); - metaOut = writeState.directory.createOutput(metaFileName, writeState.context); - - mapperService = mock(MapperService.class); - DocumentMapper documentMapper = mock(DocumentMapper.class); - when(mapperService.documentMapper()).thenReturn(documentMapper); - Settings settings = Settings.builder().put(settings(org.opensearch.Version.CURRENT).build()).build(); - NumberFieldMapper numberFieldMapper1 = new NumberFieldMapper.Builder("field2", NumberFieldMapper.NumberType.DOUBLE, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - NumberFieldMapper numberFieldMapper2 = new NumberFieldMapper.Builder("field4", NumberFieldMapper.NumberType.DOUBLE, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - NumberFieldMapper numberFieldMapper3 = new NumberFieldMapper.Builder("field6", NumberFieldMapper.NumberType.DOUBLE, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - NumberFieldMapper numberFieldMapper4 = new NumberFieldMapper.Builder("field9", NumberFieldMapper.NumberType.DOUBLE, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - NumberFieldMapper numberFieldMapper5 = new NumberFieldMapper.Builder("field10", NumberFieldMapper.NumberType.DOUBLE, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - MappingLookup fieldMappers = new MappingLookup( - Set.of(numberFieldMapper1, numberFieldMapper2, numberFieldMapper3, numberFieldMapper4, numberFieldMapper5), - Collections.emptyList(), - Collections.emptyList(), - 0, - null - ); - when(documentMapper.mappers()).thenReturn(fieldMappers); - builder = new OnHeapStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); - } - - public void test_sortAndAggregateStarTreeDocuments() throws IOException { - - int noOfStarTreeDocuments = 5; - StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; - - starTreeDocuments[0] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Double[] { 12.0, 10.0, randomDouble(), 8.0, 20.0 }); - starTreeDocuments[1] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 10.0, 6.0, randomDouble(), 12.0, 10.0 }); - starTreeDocuments[2] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 14.0, 12.0, randomDouble(), 6.0, 24.0 }); - starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Double[] { 9.0, 4.0, randomDouble(), 9.0, 12.0 }); - starTreeDocuments[4] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 11.0, 16.0, randomDouble(), 8.0, 13.0 }); - - List inorderStarTreeDocuments = List.of( - new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Object[] { 21.0, 14.0, 2L, 8.0, 20.0 }), - new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Object[] { 35.0, 34.0, 3L, 6.0, 24.0 }) - ); - Iterator expectedStarTreeDocumentIterator = inorderStarTreeDocuments.iterator(); - - StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; - for (int i = 0; i < noOfStarTreeDocuments; i++) { - Long[] metrics = new Long[starTreeDocuments[0].metrics.length]; - for (int j = 0; j < metrics.length; j++) { - metrics[j] = NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[j]); - } - segmentStarTreeDocuments[i] = new StarTreeDocument(starTreeDocuments[i].dimensions, metrics); - } - - Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateStarTreeDocuments( - segmentStarTreeDocuments, - false - ); - int numOfAggregatedDocuments = 0; - while (segmentStarTreeDocumentIterator.hasNext() && expectedStarTreeDocumentIterator.hasNext()) { - StarTreeDocument resultStarTreeDocument = segmentStarTreeDocumentIterator.next(); - StarTreeDocument expectedStarTreeDocument = expectedStarTreeDocumentIterator.next(); - for (int dim = 0; dim < 4; dim++) { - assertEquals(expectedStarTreeDocument.dimensions[dim], resultStarTreeDocument.dimensions[dim]); - } - - for (int met = 0; met < 5; met++) { - assertEquals(expectedStarTreeDocument.metrics[met], resultStarTreeDocument.metrics[met]); - } - numOfAggregatedDocuments++; - } - assertEquals(inorderStarTreeDocuments.size(), numOfAggregatedDocuments); - - } - - public void test_sortAndAggregateStarTreeDocuments_idempotentMetric() throws IOException { - - int noOfStarTreeDocuments = 5; - StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; - - starTreeDocuments[0] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Double[] { 12.0, 10.0, randomDouble(), 8.0, 20.0 }); - starTreeDocuments[1] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 10.0, 6.0, randomDouble(), 8.0, 24.0 }); - starTreeDocuments[2] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 14.0, 12.0, randomDouble(), 8.0, 20.0 }); - starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Double[] { 9.0, 4.0, randomDouble(), 9.0, 12.0 }); - starTreeDocuments[4] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 11.0, null, randomDouble(), 8.0, 20.0 }); - - StarTreeDocument expectedStarTreeDocument = new StarTreeDocument( - new Long[] { 2L, 4L, 3L, 4L }, - new Object[] { 21.0, 14.0, 2L, 8.0, 20.0 } - ); - StarTreeDocument expectedStarTreeDocumentWithIdempotentMetric = new StarTreeDocument( - new Long[] { 3L, 4L, 2L, 1L }, - new Object[] { 35.0, 18.0, 3L, 8.0, 24.0 } - ); - - StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; - for (int i = 0; i < noOfStarTreeDocuments; i++) { - Long[] metrics = new Long[starTreeDocuments[0].metrics.length]; - for (int j = 0; j < metrics.length; j++) { - metrics[j] = starTreeDocuments[i].metrics[j] != null - ? NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[j]) - : null; - } - segmentStarTreeDocuments[i] = new StarTreeDocument(starTreeDocuments[i].dimensions, metrics); - } - - Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateStarTreeDocuments( - segmentStarTreeDocuments, - false - ); - - StarTreeDocument resultStarTreeDocument = segmentStarTreeDocumentIterator.next(); - for (int dim = 0; dim < 4; dim++) { - assertEquals(expectedStarTreeDocument.dimensions[dim], resultStarTreeDocument.dimensions[dim]); - } - - for (int met = 0; met < 5; met++) { - assertEquals(expectedStarTreeDocument.metrics[met], resultStarTreeDocument.metrics[met]); - } - - resultStarTreeDocument = segmentStarTreeDocumentIterator.next(); - for (int dim = 0; dim < 4; dim++) { - assertEquals(expectedStarTreeDocumentWithIdempotentMetric.dimensions[dim], resultStarTreeDocument.dimensions[dim]); - } - - for (int met = 0; met < 5; met++) { - assertEquals(expectedStarTreeDocumentWithIdempotentMetric.metrics[met], resultStarTreeDocument.metrics[met]); - } - - } - - public void test_sortAndAggregateStarTreeDocument_longMaxAndLongMinDimensions() throws IOException { - - int noOfStarTreeDocuments = 5; - StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; - - starTreeDocuments[0] = new StarTreeDocument( - new Long[] { Long.MIN_VALUE, 4L, 3L, 4L }, - new Double[] { 12.0, 10.0, randomDouble(), 8.0, 20.0 } - ); - starTreeDocuments[1] = new StarTreeDocument( - new Long[] { 3L, 4L, 2L, Long.MAX_VALUE }, - new Double[] { 10.0, 6.0, randomDouble(), 12.0, 10.0 } - ); - starTreeDocuments[2] = new StarTreeDocument( - new Long[] { 3L, 4L, 2L, Long.MAX_VALUE }, - new Double[] { 14.0, 12.0, randomDouble(), 6.0, 24.0 } - ); - starTreeDocuments[3] = new StarTreeDocument( - new Long[] { Long.MIN_VALUE, 4L, 3L, 4L }, - new Double[] { 9.0, 4.0, randomDouble(), 9.0, 12.0 } - ); - starTreeDocuments[4] = new StarTreeDocument( - new Long[] { 3L, 4L, 2L, Long.MAX_VALUE }, - new Double[] { 11.0, 16.0, randomDouble(), 8.0, 13.0 } - ); - - List inorderStarTreeDocuments = List.of( - new StarTreeDocument(new Long[] { Long.MIN_VALUE, 4L, 3L, 4L }, new Object[] { 21.0, 14.0, 2L, 8.0, 20.0 }), - new StarTreeDocument(new Long[] { 3L, 4L, 2L, Long.MAX_VALUE }, new Object[] { 35.0, 34.0, 3L, 6.0, 24.0 }) - ); - Iterator expectedStarTreeDocumentIterator = inorderStarTreeDocuments.iterator(); - - StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; - for (int i = 0; i < noOfStarTreeDocuments; i++) { - Long[] metrics = new Long[starTreeDocuments[0].metrics.length]; - for (int j = 0; j < metrics.length; j++) { - metrics[j] = NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[j]); - } - segmentStarTreeDocuments[i] = new StarTreeDocument(starTreeDocuments[i].dimensions, metrics); - } - - Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateStarTreeDocuments( - segmentStarTreeDocuments, - false - ); - int numOfAggregatedDocuments = 0; - while (segmentStarTreeDocumentIterator.hasNext() && expectedStarTreeDocumentIterator.hasNext()) { - StarTreeDocument resultStarTreeDocument = segmentStarTreeDocumentIterator.next(); - StarTreeDocument expectedStarTreeDocument = expectedStarTreeDocumentIterator.next(); - - for (int dim = 0; dim < 4; dim++) { - assertEquals(expectedStarTreeDocument.dimensions[dim], resultStarTreeDocument.dimensions[dim]); - } - - for (int met = 0; met < 5; met++) { - assertEquals(expectedStarTreeDocument.metrics[met], resultStarTreeDocument.metrics[met]); - } - - numOfAggregatedDocuments++; - } - - assertEquals(inorderStarTreeDocuments.size(), numOfAggregatedDocuments); - - } - - public void test_sortAndAggregateStarTreeDocument_DoubleMaxAndDoubleMinMetrics() throws IOException { - - int noOfStarTreeDocuments = 5; - StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; - - starTreeDocuments[0] = new StarTreeDocument( - new Long[] { 2L, 4L, 3L, 4L }, - new Double[] { Double.MAX_VALUE, 10.0, randomDouble(), 8.0, 20.0 } - ); - starTreeDocuments[1] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 10.0, 6.0, randomDouble(), 12.0, 10.0 }); - starTreeDocuments[2] = new StarTreeDocument( - new Long[] { 3L, 4L, 2L, 1L }, - new Double[] { 14.0, Double.MIN_VALUE, randomDouble(), 6.0, 24.0 } - ); - starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Double[] { 9.0, 4.0, randomDouble(), 9.0, 12.0 }); - starTreeDocuments[4] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 11.0, 16.0, randomDouble(), 8.0, 13.0 }); - - List inorderStarTreeDocuments = List.of( - new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Object[] { Double.MAX_VALUE + 9, 14.0, 2L, 8.0, 20.0 }), - new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Object[] { 35.0, Double.MIN_VALUE + 22, 3L, 6.0, 24.0 }) - ); - Iterator expectedStarTreeDocumentIterator = inorderStarTreeDocuments.iterator(); - - StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; - for (int i = 0; i < noOfStarTreeDocuments; i++) { - Long[] metrics = new Long[starTreeDocuments[0].metrics.length]; - for (int j = 0; j < metrics.length; j++) { - metrics[j] = NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[j]); - } - segmentStarTreeDocuments[i] = new StarTreeDocument(starTreeDocuments[i].dimensions, metrics); - } - - Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateStarTreeDocuments( - segmentStarTreeDocuments, - false - ); - int numOfAggregatedDocuments = 0; - while (segmentStarTreeDocumentIterator.hasNext() && expectedStarTreeDocumentIterator.hasNext()) { - StarTreeDocument resultStarTreeDocument = segmentStarTreeDocumentIterator.next(); - StarTreeDocument expectedStarTreeDocument = expectedStarTreeDocumentIterator.next(); - - for (int dim = 0; dim < 4; dim++) { - assertEquals(expectedStarTreeDocument.dimensions[dim], resultStarTreeDocument.dimensions[dim]); - } - - for (int met = 0; met < 5; met++) { - assertEquals(expectedStarTreeDocument.metrics[met], resultStarTreeDocument.metrics[met]); - } - - numOfAggregatedDocuments++; - } - - assertEquals(inorderStarTreeDocuments.size(), numOfAggregatedDocuments); - - } - - public void test_build_halfFloatMetrics() throws IOException { - - mapperService = mock(MapperService.class); - DocumentMapper documentMapper = mock(DocumentMapper.class); - when(mapperService.documentMapper()).thenReturn(documentMapper); - Settings settings = Settings.builder().put(settings(org.opensearch.Version.CURRENT).build()).build(); - NumberFieldMapper numberFieldMapper1 = new NumberFieldMapper.Builder("field2", NumberFieldMapper.NumberType.HALF_FLOAT, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - NumberFieldMapper numberFieldMapper2 = new NumberFieldMapper.Builder("field4", NumberFieldMapper.NumberType.HALF_FLOAT, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - NumberFieldMapper numberFieldMapper3 = new NumberFieldMapper.Builder("field6", NumberFieldMapper.NumberType.HALF_FLOAT, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - NumberFieldMapper numberFieldMapper4 = new NumberFieldMapper.Builder("field9", NumberFieldMapper.NumberType.HALF_FLOAT, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - NumberFieldMapper numberFieldMapper5 = new NumberFieldMapper.Builder( - "field10", - NumberFieldMapper.NumberType.HALF_FLOAT, - false, - true - ).build(new Mapper.BuilderContext(settings, new ContentPath())); - MappingLookup fieldMappers = new MappingLookup( - Set.of(numberFieldMapper1, numberFieldMapper2, numberFieldMapper3, numberFieldMapper4, numberFieldMapper5), - Collections.emptyList(), - Collections.emptyList(), - 0, - null - ); - when(documentMapper.mappers()).thenReturn(fieldMappers); - builder = new OnHeapStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); - - int noOfStarTreeDocuments = 5; - StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; - - starTreeDocuments[0] = new StarTreeDocument( - new Long[] { 2L, 4L, 3L, 4L }, - new HalfFloatPoint[] { - new HalfFloatPoint("hf1", 12), - new HalfFloatPoint("hf6", 10), - new HalfFloatPoint("field6", 10), - new HalfFloatPoint("field9", 8), - new HalfFloatPoint("field10", 20) } - ); - starTreeDocuments[1] = new StarTreeDocument( - new Long[] { 3L, 4L, 2L, 1L }, - new HalfFloatPoint[] { - new HalfFloatPoint("hf2", 10), - new HalfFloatPoint("hf7", 6), - new HalfFloatPoint("field6", 10), - new HalfFloatPoint("field9", 12), - new HalfFloatPoint("field10", 10) } - ); - starTreeDocuments[2] = new StarTreeDocument( - new Long[] { 3L, 4L, 2L, 1L }, - new HalfFloatPoint[] { - new HalfFloatPoint("hf3", 14), - new HalfFloatPoint("hf8", 12), - new HalfFloatPoint("field6", 10), - new HalfFloatPoint("field9", 6), - new HalfFloatPoint("field10", 24) } - ); - starTreeDocuments[3] = new StarTreeDocument( - new Long[] { 2L, 4L, 3L, 4L }, - new HalfFloatPoint[] { - new HalfFloatPoint("hf4", 9), - new HalfFloatPoint("hf9", 4), - new HalfFloatPoint("field6", 10), - new HalfFloatPoint("field9", 9), - new HalfFloatPoint("field10", 12) } - ); - starTreeDocuments[4] = new StarTreeDocument( - new Long[] { 3L, 4L, 2L, 1L }, - new HalfFloatPoint[] { - new HalfFloatPoint("hf5", 11), - new HalfFloatPoint("hf10", 16), - new HalfFloatPoint("field6", 10), - new HalfFloatPoint("field9", 8), - new HalfFloatPoint("field10", 13) } - ); - - StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; - for (int i = 0; i < noOfStarTreeDocuments; i++) { - Long[] metrics = new Long[starTreeDocuments[0].metrics.length]; - for (int j = 0; j < metrics.length; j++) { - metrics[j] = (long) HalfFloatPoint.halfFloatToSortableShort( - ((HalfFloatPoint) starTreeDocuments[i].metrics[j]).numericValue().floatValue() - ); - } - segmentStarTreeDocuments[i] = new StarTreeDocument(starTreeDocuments[i].dimensions, metrics); - } - - Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateStarTreeDocuments( - segmentStarTreeDocuments, - false - ); - builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); - List resultStarTreeDocuments = builder.getStarTreeDocuments(); - assertEquals(7, resultStarTreeDocuments.size()); - - Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator(); - assertStarTreeDocuments(resultStarTreeDocuments, expectedStarTreeDocumentIterator); - } - - public void test_build_floatMetrics() throws IOException { - - mapperService = mock(MapperService.class); - DocumentMapper documentMapper = mock(DocumentMapper.class); - when(mapperService.documentMapper()).thenReturn(documentMapper); - Settings settings = Settings.builder().put(settings(org.opensearch.Version.CURRENT).build()).build(); - NumberFieldMapper numberFieldMapper1 = new NumberFieldMapper.Builder("field2", NumberFieldMapper.NumberType.FLOAT, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - NumberFieldMapper numberFieldMapper2 = new NumberFieldMapper.Builder("field4", NumberFieldMapper.NumberType.FLOAT, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - NumberFieldMapper numberFieldMapper3 = new NumberFieldMapper.Builder("field6", NumberFieldMapper.NumberType.FLOAT, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - NumberFieldMapper numberFieldMapper4 = new NumberFieldMapper.Builder("field9", NumberFieldMapper.NumberType.FLOAT, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - NumberFieldMapper numberFieldMapper5 = new NumberFieldMapper.Builder("field10", NumberFieldMapper.NumberType.FLOAT, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - MappingLookup fieldMappers = new MappingLookup( - Set.of(numberFieldMapper1, numberFieldMapper2, numberFieldMapper3, numberFieldMapper4, numberFieldMapper5), - Collections.emptyList(), - Collections.emptyList(), - 0, - null - ); - when(documentMapper.mappers()).thenReturn(fieldMappers); - builder = new OnHeapStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); - - int noOfStarTreeDocuments = 5; - StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; - - starTreeDocuments[0] = new StarTreeDocument( - new Long[] { 2L, 4L, 3L, 4L }, - new Float[] { 12.0F, 10.0F, randomFloat(), 8.0F, 20.0F } - ); - starTreeDocuments[1] = new StarTreeDocument( - new Long[] { 3L, 4L, 2L, 1L }, - new Float[] { 10.0F, 6.0F, randomFloat(), 12.0F, 10.0F } - ); - starTreeDocuments[2] = new StarTreeDocument( - new Long[] { 3L, 4L, 2L, 1L }, - new Float[] { 14.0F, 12.0F, randomFloat(), 6.0F, 24.0F } - ); - starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Float[] { 9.0F, 4.0F, randomFloat(), 9.0F, 12.0F }); - starTreeDocuments[4] = new StarTreeDocument( - new Long[] { 3L, 4L, 2L, 1L }, - new Float[] { 11.0F, 16.0F, randomFloat(), 8.0F, 13.0F } - ); - - StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; - for (int i = 0; i < noOfStarTreeDocuments; i++) { - Long[] metrics = new Long[starTreeDocuments[0].metrics.length]; - for (int j = 0; j < metrics.length; j++) { - metrics[j] = (long) NumericUtils.floatToSortableInt((Float) starTreeDocuments[i].metrics[j]); - } - segmentStarTreeDocuments[i] = new StarTreeDocument(starTreeDocuments[i].dimensions, metrics); - } - - Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateStarTreeDocuments( - segmentStarTreeDocuments, - false - ); - builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); - - List resultStarTreeDocuments = builder.getStarTreeDocuments(); - assertEquals(7, resultStarTreeDocuments.size()); - - Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator(); - assertStarTreeDocuments(resultStarTreeDocuments, expectedStarTreeDocumentIterator); - } - - public void test_build_longMetrics() throws IOException { - - mapperService = mock(MapperService.class); - DocumentMapper documentMapper = mock(DocumentMapper.class); - when(mapperService.documentMapper()).thenReturn(documentMapper); - Settings settings = Settings.builder().put(settings(org.opensearch.Version.CURRENT).build()).build(); - NumberFieldMapper numberFieldMapper1 = new NumberFieldMapper.Builder("field2", NumberFieldMapper.NumberType.LONG, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - NumberFieldMapper numberFieldMapper2 = new NumberFieldMapper.Builder("field4", NumberFieldMapper.NumberType.LONG, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - NumberFieldMapper numberFieldMapper3 = new NumberFieldMapper.Builder("field6", NumberFieldMapper.NumberType.LONG, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - NumberFieldMapper numberFieldMapper4 = new NumberFieldMapper.Builder("field9", NumberFieldMapper.NumberType.LONG, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - NumberFieldMapper numberFieldMapper5 = new NumberFieldMapper.Builder("field10", NumberFieldMapper.NumberType.LONG, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - MappingLookup fieldMappers = new MappingLookup( - Set.of(numberFieldMapper1, numberFieldMapper2, numberFieldMapper3, numberFieldMapper4, numberFieldMapper5), - Collections.emptyList(), - Collections.emptyList(), - 0, - null - ); - when(documentMapper.mappers()).thenReturn(fieldMappers); - builder = new OnHeapStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); - - int noOfStarTreeDocuments = 5; - StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; - - starTreeDocuments[0] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Long[] { 12L, 10L, randomLong(), 8L, 20L }); - starTreeDocuments[1] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Long[] { 10L, 6L, randomLong(), 12L, 10L }); - starTreeDocuments[2] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Long[] { 14L, 12L, randomLong(), 6L, 24L }); - starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Long[] { 9L, 4L, randomLong(), 9L, 12L }); - starTreeDocuments[4] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Long[] { 11L, 16L, randomLong(), 8L, 13L }); - - StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; - for (int i = 0; i < noOfStarTreeDocuments; i++) { - Long[] metrics = new Long[starTreeDocuments[0].metrics.length]; - for (int j = 0; j < metrics.length; j++) { - metrics[j] = (Long) starTreeDocuments[i].metrics[j]; - } - segmentStarTreeDocuments[i] = new StarTreeDocument(starTreeDocuments[i].dimensions, metrics); - } - - Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateStarTreeDocuments( - segmentStarTreeDocuments, - false - ); - builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); - - List resultStarTreeDocuments = builder.getStarTreeDocuments(); - assertEquals(7, resultStarTreeDocuments.size()); - - Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator(); - assertStarTreeDocuments(resultStarTreeDocuments, expectedStarTreeDocumentIterator); - } - - private static Iterator getExpectedStarTreeDocumentIterator() { - List expectedStarTreeDocuments = List.of( - new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Object[] { 21.0, 14.0, 2L, 8.0, 20.0 }), - new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Object[] { 35.0, 34.0, 3L, 6.0, 24.0 }), - new StarTreeDocument(new Long[] { null, 4L, 2L, 1L }, new Object[] { 35.0, 34.0, 3L, 6.0, 24.0 }), - new StarTreeDocument(new Long[] { null, 4L, 3L, 4L }, new Object[] { 21.0, 14.0, 2L, 8.0, 20.0 }), - new StarTreeDocument(new Long[] { null, 4L, null, 1L }, new Object[] { 35.0, 34.0, 3L, 6.0, 24.0 }), - new StarTreeDocument(new Long[] { null, 4L, null, 4L }, new Object[] { 21.0, 14.0, 2L, 8.0, 20.0 }), - new StarTreeDocument(new Long[] { null, 4L, null, null }, new Object[] { 56.0, 48.0, 5L, 6.0, 24.0 }) - ); - return expectedStarTreeDocuments.iterator(); - } - - public void test_build() throws IOException { - - int noOfStarTreeDocuments = 5; - StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; - - starTreeDocuments[0] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Double[] { 12.0, 10.0, randomDouble(), 8.0, 20.0 }); - starTreeDocuments[1] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 10.0, 6.0, randomDouble(), 12.0, 10.0 }); - starTreeDocuments[2] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 14.0, 12.0, randomDouble(), 6.0, 24.0 }); - starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Double[] { 9.0, 4.0, randomDouble(), 9.0, 12.0 }); - starTreeDocuments[4] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 11.0, 16.0, randomDouble(), 8.0, 13.0 }); - - StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; - for (int i = 0; i < noOfStarTreeDocuments; i++) { - Long[] metrics = new Long[starTreeDocuments[0].metrics.length]; - for (int j = 0; j < metrics.length; j++) { - metrics[j] = NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[j]); - } - segmentStarTreeDocuments[i] = new StarTreeDocument(starTreeDocuments[i].dimensions, metrics); - } - - Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateStarTreeDocuments( - segmentStarTreeDocuments, - false - ); - builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); - - List resultStarTreeDocuments = builder.getStarTreeDocuments(); - assertEquals(7, resultStarTreeDocuments.size()); - - Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator(); - assertStarTreeDocuments(resultStarTreeDocuments, expectedStarTreeDocumentIterator); - } - - private void assertStarTreeDocuments( - List resultStarTreeDocuments, - Iterator expectedStarTreeDocumentIterator - ) { - Iterator resultStarTreeDocumentIterator = resultStarTreeDocuments.iterator(); - while (resultStarTreeDocumentIterator.hasNext() && expectedStarTreeDocumentIterator.hasNext()) { - StarTreeDocument resultStarTreeDocument = resultStarTreeDocumentIterator.next(); - StarTreeDocument expectedStarTreeDocument = expectedStarTreeDocumentIterator.next(); - - assertEquals(expectedStarTreeDocument.dimensions[0], resultStarTreeDocument.dimensions[0]); - assertEquals(expectedStarTreeDocument.dimensions[1], resultStarTreeDocument.dimensions[1]); - assertEquals(expectedStarTreeDocument.dimensions[2], resultStarTreeDocument.dimensions[2]); - assertEquals(expectedStarTreeDocument.dimensions[3], resultStarTreeDocument.dimensions[3]); - assertEquals(expectedStarTreeDocument.metrics[0], resultStarTreeDocument.metrics[0]); - assertEquals(expectedStarTreeDocument.metrics[1], resultStarTreeDocument.metrics[1]); - assertEquals(expectedStarTreeDocument.metrics[2], resultStarTreeDocument.metrics[2]); - assertEquals(expectedStarTreeDocument.metrics[3], resultStarTreeDocument.metrics[3]); - assertEquals(expectedStarTreeDocument.metrics[4], resultStarTreeDocument.metrics[4]); - } - } - - public void test_build_starTreeDataset() throws IOException { - - fields = List.of("fieldC", "fieldB", "fieldL", "fieldI"); - - dimensionsOrder = List.of(new NumericDimension("fieldC"), new NumericDimension("fieldB"), new NumericDimension("fieldL")); - metrics = List.of(new Metric("fieldI", List.of(MetricStat.SUM))); - - DocValuesProducer docValuesProducer = mock(DocValuesProducer.class); - - compositeField = new StarTreeField( - "test", - dimensionsOrder, - metrics, - new StarTreeFieldConfiguration(1, Set.of(), StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP) - ); - SegmentInfo segmentInfo = new SegmentInfo( - directory, - Version.LATEST, - Version.LUCENE_9_11_0, - "test_segment", - 7, - false, - false, - new Lucene99Codec(), - new HashMap<>(), - UUID.randomUUID().toString().substring(0, 16).getBytes(StandardCharsets.UTF_8), - new HashMap<>(), - null - ); - - fieldsInfo = new FieldInfo[fields.size()]; - fieldProducerMap = new HashMap<>(); - for (int i = 0; i < fieldsInfo.length; i++) { - fieldsInfo[i] = new FieldInfo( - fields.get(i), - i, - false, - false, - true, - IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, - DocValuesType.SORTED_NUMERIC, - -1, - Collections.emptyMap(), - 0, - 0, - 0, - 0, - VectorEncoding.FLOAT32, - VectorSimilarityFunction.EUCLIDEAN, - false, - false - ); - fieldProducerMap.put(fields.get(i), docValuesProducer); - } - FieldInfos fieldInfos = new FieldInfos(fieldsInfo); - writeState = new SegmentWriteState(InfoStream.getDefault(), segmentInfo.dir, segmentInfo, fieldInfos, null, newIOContext(random())); - - mapperService = mock(MapperService.class); - DocumentMapper documentMapper = mock(DocumentMapper.class); - when(mapperService.documentMapper()).thenReturn(documentMapper); - Settings settings = Settings.builder().put(settings(org.opensearch.Version.CURRENT).build()).build(); - NumberFieldMapper numberFieldMapper1 = new NumberFieldMapper.Builder("fieldI", NumberFieldMapper.NumberType.DOUBLE, false, true) - .build(new Mapper.BuilderContext(settings, new ContentPath())); - MappingLookup fieldMappers = new MappingLookup( - Set.of(numberFieldMapper1), - Collections.emptyList(), - Collections.emptyList(), - 0, - null - ); - when(documentMapper.mappers()).thenReturn(fieldMappers); - builder = new OnHeapStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); - - int noOfStarTreeDocuments = 7; - StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; - starTreeDocuments[0] = new StarTreeDocument(new Long[] { 1L, 11L, 21L }, new Double[] { 400.0 }); - starTreeDocuments[1] = new StarTreeDocument(new Long[] { 1L, 12L, 22L }, new Double[] { 200.0 }); - starTreeDocuments[2] = new StarTreeDocument(new Long[] { 2L, 13L, 23L }, new Double[] { 300.0 }); - starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, 13L, 21L }, new Double[] { 100.0 }); - starTreeDocuments[4] = new StarTreeDocument(new Long[] { 3L, 11L, 21L }, new Double[] { 600.0 }); - starTreeDocuments[5] = new StarTreeDocument(new Long[] { 3L, 12L, 23L }, new Double[] { 200.0 }); - starTreeDocuments[6] = new StarTreeDocument(new Long[] { 3L, 12L, 21L }, new Double[] { 400.0 }); - - StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; - for (int i = 0; i < noOfStarTreeDocuments; i++) { - long metric1 = NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[0]); - segmentStarTreeDocuments[i] = new StarTreeDocument(starTreeDocuments[i].dimensions, new Long[] { metric1 }); - } - - Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateStarTreeDocuments( - segmentStarTreeDocuments, - false - ); - builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); - - List resultStarTreeDocuments = builder.getStarTreeDocuments(); - List expectedStarTreeDocuments = List.of( - new StarTreeDocument(new Long[] { 1L, 11L, 21L }, new Object[] { 400.0 }), - new StarTreeDocument(new Long[] { 1L, 12L, 22L }, new Object[] { 200.0 }), - new StarTreeDocument(new Long[] { 2L, 13L, 21L }, new Object[] { 100.0 }), - new StarTreeDocument(new Long[] { 2L, 13L, 23L }, new Object[] { 300.0 }), - new StarTreeDocument(new Long[] { 3L, 11L, 21L }, new Object[] { 600.0 }), - new StarTreeDocument(new Long[] { 3L, 12L, 21L }, new Object[] { 400.0 }), - new StarTreeDocument(new Long[] { 3L, 12L, 23L }, new Object[] { 200.0 }), - new StarTreeDocument(new Long[] { null, 11L, 21L }, new Object[] { 1000.0 }), - new StarTreeDocument(new Long[] { null, 12L, 21L }, new Object[] { 400.0 }), - new StarTreeDocument(new Long[] { null, 12L, 22L }, new Object[] { 200.0 }), - new StarTreeDocument(new Long[] { null, 12L, 23L }, new Object[] { 200.0 }), - new StarTreeDocument(new Long[] { null, 13L, 21L }, new Object[] { 100.0 }), - new StarTreeDocument(new Long[] { null, 13L, 23L }, new Object[] { 300.0 }), - new StarTreeDocument(new Long[] { null, null, 21L }, new Object[] { 1500.0 }), - new StarTreeDocument(new Long[] { null, null, 22L }, new Object[] { 200.0 }), - new StarTreeDocument(new Long[] { null, null, 23L }, new Object[] { 500.0 }), - new StarTreeDocument(new Long[] { null, null, null }, new Object[] { 2200.0 }), - new StarTreeDocument(new Long[] { null, 12L, null }, new Object[] { 800.0 }), - new StarTreeDocument(new Long[] { null, 13L, null }, new Object[] { 400.0 }), - new StarTreeDocument(new Long[] { 1L, null, 21L }, new Object[] { 400.0 }), - new StarTreeDocument(new Long[] { 1L, null, 22L }, new Object[] { 200.0 }), - new StarTreeDocument(new Long[] { 1L, null, null }, new Object[] { 600.0 }), - new StarTreeDocument(new Long[] { 2L, 13L, null }, new Object[] { 400.0 }), - new StarTreeDocument(new Long[] { 3L, null, 21L }, new Object[] { 1000.0 }), - new StarTreeDocument(new Long[] { 3L, null, 23L }, new Object[] { 200.0 }), - new StarTreeDocument(new Long[] { 3L, null, null }, new Object[] { 1200.0 }), - new StarTreeDocument(new Long[] { 3L, 12L, null }, new Object[] { 600.0 }) - ); - - Iterator expectedStarTreeDocumentIterator = expectedStarTreeDocuments.iterator(); - Iterator resultStarTreeDocumentIterator = resultStarTreeDocuments.iterator(); - while (resultStarTreeDocumentIterator.hasNext() && expectedStarTreeDocumentIterator.hasNext()) { - StarTreeDocument resultStarTreeDocument = resultStarTreeDocumentIterator.next(); - StarTreeDocument expectedStarTreeDocument = expectedStarTreeDocumentIterator.next(); - - assertEquals(expectedStarTreeDocument.dimensions[0], resultStarTreeDocument.dimensions[0]); - assertEquals(expectedStarTreeDocument.dimensions[1], resultStarTreeDocument.dimensions[1]); - assertEquals(expectedStarTreeDocument.dimensions[2], resultStarTreeDocument.dimensions[2]); - assertEquals(expectedStarTreeDocument.metrics[0], resultStarTreeDocument.metrics[0]); - } - - } - - public void testFlushFlow() throws IOException { - List dimList = List.of(0L, 1L, 3L, 4L, 5L); - List docsWithField = List.of(0, 1, 3, 4, 5); - List dimList2 = List.of(0L, 1L, 2L, 3L, 4L, 5L); - List docsWithField2 = List.of(0, 1, 2, 3, 4, 5); - - List metricsList = List.of( - getLongFromDouble(0.0), - getLongFromDouble(10.0), - getLongFromDouble(20.0), - getLongFromDouble(30.0), - getLongFromDouble(40.0), - getLongFromDouble(50.0) - ); - List metricsWithField = List.of(0, 1, 2, 3, 4, 5); - - Dimension d1 = new NumericDimension("field1"); - Dimension d2 = new NumericDimension("field3"); - Metric m1 = new Metric("field2", List.of(MetricStat.SUM)); - Metric m2 = new Metric("field2", List.of(MetricStat.COUNT)); - List dims = List.of(d1, d2); - List metrics = List.of(m1, m2); - StarTreeFieldConfiguration c = new StarTreeFieldConfiguration( - 1000, - new HashSet<>(), - StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP - ); - StarTreeField sf = new StarTreeField("sf", dims, metrics, c); - SortedNumericDocValues d1sndv = getSortedNumericMock(dimList, docsWithField); - SortedNumericDocValues d2sndv = getSortedNumericMock(dimList2, docsWithField2); - SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); - SortedNumericDocValues m2sndv = getSortedNumericMock(metricsList, metricsWithField); - - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, writeState, mapperService); - SequentialDocValuesIterator[] dimDvs = { new SequentialDocValuesIterator(d1sndv), new SequentialDocValuesIterator(d2sndv) }; - Iterator starTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( - dimDvs, - List.of(new SequentialDocValuesIterator(m1sndv), new SequentialDocValuesIterator(m2sndv)) - ); - /** - * Asserting following dim / metrics [ dim1, dim2 / Sum [metric], count [metric] ] - [0, 0] | [0.0, 1] - [1, 1] | [10.0, 1] - [3, 3] | [30.0, 1] - [4, 4] | [40.0, 1] - [5, 5] | [50.0, 1] - [null, 2] | [20.0, 1] - */ - int count = 0; - while (starTreeDocumentIterator.hasNext()) { - count++; - StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); - assertEquals( - starTreeDocument.dimensions[0] != null ? starTreeDocument.dimensions[0] * 1 * 10.0 : 20.0, - starTreeDocument.metrics[0] - ); - assertEquals(1L, starTreeDocument.metrics[1]); - } - assertEquals(6, count); - } - - public void testMergeFlow() throws IOException { - List dimList = List.of(0L, 1L, 3L, 4L, 5L); - List docsWithField = List.of(0, 1, 3, 4, 5); - List dimList2 = List.of(0L, 1L, 2L, 3L, 4L, 5L); - List docsWithField2 = List.of(0, 1, 2, 3, 4, 5); - - List metricsList = List.of( - getLongFromDouble(0.0), - getLongFromDouble(10.0), - getLongFromDouble(20.0), - getLongFromDouble(30.0), - getLongFromDouble(40.0), - getLongFromDouble(50.0) - ); - List metricsWithField = List.of(0, 1, 2, 3, 4, 5); - - Dimension d1 = new NumericDimension("field1"); - Dimension d2 = new NumericDimension("field3"); - Metric m1 = new Metric("field2", List.of(MetricStat.SUM)); - List dims = List.of(d1, d2); - List metrics = List.of(m1); - StarTreeFieldConfiguration c = new StarTreeFieldConfiguration( - 1000, - new HashSet<>(), - StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP - ); - StarTreeField sf = new StarTreeField("sf", dims, metrics, c); - SortedNumericDocValues d1sndv = getSortedNumericMock(dimList, docsWithField); - SortedNumericDocValues d2sndv = getSortedNumericMock(dimList2, docsWithField2); - SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); - Map dimDocIdSetIterators = Map.of("field1", d1sndv, "field3", d2sndv); - Map metricDocIdSetIterators = Map.of("field2", m1sndv); - StarTreeValues starTreeValues = new StarTreeValues(sf, null, dimDocIdSetIterators, metricDocIdSetIterators, new HashMap<>()); - - SortedNumericDocValues f2d1sndv = getSortedNumericMock(dimList, docsWithField); - SortedNumericDocValues f2d2sndv = getSortedNumericMock(dimList2, docsWithField2); - SortedNumericDocValues f2m1sndv = getSortedNumericMock(metricsList, metricsWithField); - Map f2dimDocIdSetIterators = Map.of("field1", f2d1sndv, "field3", f2d2sndv); - Map f2metricDocIdSetIterators = Map.of("field2", f2m1sndv); - StarTreeValues starTreeValues2 = new StarTreeValues(sf, null, f2dimDocIdSetIterators, f2metricDocIdSetIterators, new HashMap<>()); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, writeState, mapperService); - Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); - - /** - * Asserting following dim / metrics [ dim1, dim2 / Sum [ metric] ] - * [0, 0] | [0.0] - * [1, 1] | [20.0] - * [3, 3] | [60.0] - * [4, 4] | [80.0] - * [5, 5] | [100.0] - * [null, 2] | [40.0] - */ - int count = 0; - while (starTreeDocumentIterator.hasNext()) { - count++; - StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); - assertEquals( - starTreeDocument.dimensions[0] != null ? starTreeDocument.dimensions[0] * 2 * 10.0 : 40.0, - starTreeDocument.metrics[0] - ); - } - assertEquals(6, count); - } - - public void testMergeFlowWithCount() throws IOException { - List dimList = List.of(0L, 1L, 3L, 4L, 5L); - List docsWithField = List.of(0, 1, 3, 4, 5); - List dimList2 = List.of(0L, 1L, 2L, 3L, 4L, 5L); - List docsWithField2 = List.of(0, 1, 2, 3, 4, 5); - - List metricsList = List.of(0L, 1L, 2L, 3L, 4L, 5L); - List metricsWithField = List.of(0, 1, 2, 3, 4, 5); - - Dimension d1 = new NumericDimension("field1"); - Dimension d2 = new NumericDimension("field3"); - Metric m1 = new Metric("field2", List.of(MetricStat.COUNT)); - List dims = List.of(d1, d2); - List metrics = List.of(m1); - StarTreeFieldConfiguration c = new StarTreeFieldConfiguration( - 1000, - new HashSet<>(), - StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP - ); - StarTreeField sf = new StarTreeField("sf", dims, metrics, c); - SortedNumericDocValues d1sndv = getSortedNumericMock(dimList, docsWithField); - SortedNumericDocValues d2sndv = getSortedNumericMock(dimList2, docsWithField2); - SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); - Map dimDocIdSetIterators = Map.of("field1", d1sndv, "field3", d2sndv); - Map metricDocIdSetIterators = Map.of("field2", m1sndv); - StarTreeValues starTreeValues = new StarTreeValues(sf, null, dimDocIdSetIterators, metricDocIdSetIterators, new HashMap<>()); - - SortedNumericDocValues f2d1sndv = getSortedNumericMock(dimList, docsWithField); - SortedNumericDocValues f2d2sndv = getSortedNumericMock(dimList2, docsWithField2); - SortedNumericDocValues f2m1sndv = getSortedNumericMock(metricsList, metricsWithField); - Map f2dimDocIdSetIterators = Map.of("field1", f2d1sndv, "field3", f2d2sndv); - Map f2metricDocIdSetIterators = Map.of("field2", f2m1sndv); - StarTreeValues starTreeValues2 = new StarTreeValues(sf, null, f2dimDocIdSetIterators, f2metricDocIdSetIterators, new HashMap<>()); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, writeState, mapperService); - Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); - /** - * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] - [0, 0] | [0] - [1, 1] | [2] - [3, 3] | [6] - [4, 4] | [8] - [5, 5] | [10] - [null, 2] | [4] - */ - int count = 0; - while (starTreeDocumentIterator.hasNext()) { - count++; - StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); - assertEquals(starTreeDocument.dimensions[0] != null ? starTreeDocument.dimensions[0] * 2 : 4, starTreeDocument.metrics[0]); - } - assertEquals(6, count); - } - - private Long getLongFromDouble(Double num) { - if (num == null) { - return null; - } - return NumericUtils.doubleToSortableLong(num); - } - - private SortedNumericDocValues getSortedNumericMock(List dimList, List docsWithField) { - return new SortedNumericDocValues() { - int index = -1; - - @Override - public long nextValue() throws IOException { - return dimList.get(index); - } - - @Override - public int docValueCount() { - return 0; - } - - @Override - public boolean advanceExact(int target) throws IOException { - return false; - } - - @Override - public int docID() { - return index; - } - - @Override - public int nextDoc() throws IOException { - if (index == docsWithField.size() - 1) { - return NO_MORE_DOCS; - } - index++; - return docsWithField.get(index); - } - - @Override - public int advance(int target) throws IOException { - return 0; - } - - @Override - public long cost() { - return 0; - } - }; - } +public class OnHeapStarTreeBuilderTests extends AbstractStarTreeBuilderTests { @Override - public void tearDown() throws Exception { - super.tearDown(); - dataOut.close(); - metaOut.close(); - directory.close(); + public BaseStarTreeBuilder getStarTreeBuilder( + StarTreeField starTreeField, + SegmentWriteState segmentWriteState, + MapperService mapperService + ) throws IOException { + return new OnHeapStarTreeBuilder(metaOut, dataOut, starTreeField, segmentWriteState, mapperService); } } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java index cae4ad00c9693..c1a97f6cb26bd 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java @@ -120,9 +120,7 @@ public void test_starTreeMetadata() throws IOException { metrics = List.of( new Metric("field2", List.of(MetricStat.SUM)), new Metric("field4", List.of(MetricStat.SUM)), - new Metric("field6", List.of(MetricStat.COUNT)), - new Metric("field9", List.of(MetricStat.MIN)), - new Metric("field10", List.of(MetricStat.MAX)) + new Metric("field6", List.of(MetricStat.COUNT)) ); int maxLeafDocs = randomNonNegativeInt(); StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration( @@ -159,7 +157,7 @@ public void test_starTreeMetadata() throws IOException { ); metaOut.close(); metaIn = directory.openInput("star-tree-metadata", IOContext.READONCE); - assertEquals(MAGIC_MARKER, metaIn.readVLong()); + assertEquals(MAGIC_MARKER, metaIn.readLong()); assertEquals(VERSION, metaIn.readVInt()); String compositeFieldName = metaIn.readString(); @@ -174,20 +172,13 @@ public void test_starTreeMetadata() throws IOException { assertNotNull(starTreeMetadata); for (int i = 0; i < dimensionsOrder.size(); i++) { - assertEquals( - writeState.fieldInfos.fieldInfo(dimensionsOrder.get(i).getField()).getFieldNumber(), - starTreeMetadata.getDimensionFieldNumbers().get(i), - 0 - ); + assertEquals(dimensionsOrder.get(i).getField(), starTreeMetadata.getDimensionFields().get(i)); } for (int i = 0; i < metricAggregatorInfos.size(); i++) { MetricEntry metricEntry = starTreeMetadata.getMetricEntries().get(i); - assertEquals( - metricAggregatorInfos.get(i).getField(), - writeState.fieldInfos.fieldInfo(metricEntry.getMetricFieldNumber()).getName() - ); - assertEquals(metricAggregatorInfos.get(i).getMetricStat(), MetricStat.fromMetricOrdinal(metricEntry.getMetricStatOrdinal())); + assertEquals(metricAggregatorInfos.get(i).getField(), metricEntry.getMetricFieldName()); + assertEquals(metricAggregatorInfos.get(i).getMetricStat(), metricEntry.getMetricStat()); } assertEquals(segmentDocumentCount, starTreeMetadata.getSegmentAggregatedDocCount(), 0); assertEquals(maxLeafDocs, starTreeMetadata.getMaxLeafDocs(), 0); @@ -196,8 +187,7 @@ public void test_starTreeMetadata() throws IOException { starTreeMetadata.getSkipStarNodeCreationInDims().size() ); for (String skipStarNodeCreationInDims : starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims()) { - Integer skipStarNodeCreationInDimsFieldNumber = writeState.fieldInfos.fieldInfo(skipStarNodeCreationInDims).getFieldNumber(); - assertTrue(starTreeMetadata.getSkipStarNodeCreationInDims().contains(skipStarNodeCreationInDimsFieldNumber)); + assertTrue(starTreeMetadata.getSkipStarNodeCreationInDims().contains(skipStarNodeCreationInDims)); } assertEquals(starTreeFieldConfiguration.getBuildMode(), starTreeMetadata.getStarTreeBuildMode()); assertEquals(dataFileLength, starTreeMetadata.getDataLength()); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java index c511aeb226890..13ead0efd8c51 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java @@ -47,7 +47,7 @@ public void test_StarTreeNode() throws IOException { TreeNode root = generateSampleTree(levelOrderStarTreeNodeMap); long starTreeDataLength = StarTreeUtils.writeStarTree(dataOut, root, 7, "star-tree"); - //asserting on the actual length of the star tree data file + // asserting on the actual length of the star tree data file assertEquals(starTreeDataLength, 247); dataOut.close(); @@ -112,7 +112,7 @@ private TreeNode generateSampleTree(Map levelOrderStarTreeNode) root.endDocId = 100; root.childDimensionId = 1; root.aggregatedDocId = randomInt(); - root.nodeType = randomByte(); + root.nodeType = (byte) 0; root.children = new HashMap<>(); levelOrderStarTreeNode.put(root.dimensionValue, root); @@ -125,7 +125,7 @@ private TreeNode generateSampleTree(Map levelOrderStarTreeNode) dim1Node1.endDocId = 50; dim1Node1.childDimensionId = 2; dim1Node1.aggregatedDocId = randomInt(); - dim1Node1.nodeType = randomByte(); + root.nodeType = (byte) 0; dim1Node1.children = new HashMap<>(); TreeNode dim1Node2 = new TreeNode(); @@ -135,7 +135,7 @@ private TreeNode generateSampleTree(Map levelOrderStarTreeNode) dim1Node2.endDocId = 100; dim1Node2.childDimensionId = 2; dim1Node2.aggregatedDocId = randomInt(); - dim1Node2.nodeType = randomByte(); + root.nodeType = (byte) 0; dim1Node2.children = new HashMap<>(); root.children.put(1L, dim1Node1); @@ -152,7 +152,7 @@ private TreeNode generateSampleTree(Map levelOrderStarTreeNode) dim2Node1.endDocId = 25; dim2Node1.childDimensionId = -1; dim2Node1.aggregatedDocId = randomInt(); - dim2Node1.nodeType = randomByte(); + root.nodeType = (byte) 0; dim2Node1.children = null; TreeNode dim2Node2 = new TreeNode(); @@ -162,7 +162,7 @@ private TreeNode generateSampleTree(Map levelOrderStarTreeNode) dim2Node2.endDocId = 50; dim2Node2.childDimensionId = -1; dim2Node2.aggregatedDocId = randomInt(); - dim2Node2.nodeType = randomByte(); + root.nodeType = (byte) 0; dim2Node2.children = null; TreeNode dim2Node3 = new TreeNode(); @@ -172,7 +172,7 @@ private TreeNode generateSampleTree(Map levelOrderStarTreeNode) dim2Node3.endDocId = 75; dim2Node3.childDimensionId = -1; dim2Node3.aggregatedDocId = randomInt(); - dim2Node3.nodeType = randomByte(); + root.nodeType = (byte) 0; dim2Node3.children = null; TreeNode dim2Node4 = new TreeNode(); @@ -182,7 +182,7 @@ private TreeNode generateSampleTree(Map levelOrderStarTreeNode) dim2Node4.endDocId = 100; dim2Node4.childDimensionId = -1; dim2Node4.aggregatedDocId = randomInt(); - dim2Node4.nodeType = randomByte(); + root.nodeType = (byte) 0; dim2Node4.children = null; dim1Node1.children.put(3L, dim2Node1); From ad3cc65e274435a5d41ecc3dd487ccfffd975f2f Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Thu, 25 Jul 2024 12:13:26 +0530 Subject: [PATCH 3/3] refactored packages, minor fixes Signed-off-by: Sarthak Aggarwal --- .../composite/Composite99DocValuesReader.java | 8 +- .../fileformats}/meta/MetricEntry.java | 2 +- .../fileformats}/meta/StarTreeMetadata.java | 2 +- .../fileformats}/meta/package-info.java | 2 +- .../startree/fileformats/package-info.java | 14 ++++ .../writer}/StarTreeDataWriter.java | 23 +++--- .../writer}/StarTreeMetaWriter.java | 6 +- .../fileformats/writer/StarTreeWriter.java | 74 +++++++++++++++++++ .../fileformats/writer/package-info.java | 14 ++++ .../datacube/startree/package-info.java | 2 +- .../CompositeIndexConstants.java | 2 +- .../startree/builder/BaseStarTreeBuilder.java | 72 ++++++------------ .../node/FixedLengthStarTreeNode.java | 9 --- .../InMemoryTreeNode.java} | 6 +- .../datacube/startree/node/StarTree.java | 8 +- .../startree/utils/StarTreeUtils.java | 50 ------------- .../builder/AbstractStarTreeBuilderTests.java | 14 ++-- .../startree/meta/StarTreeMetaTests.java | 10 ++- .../node/FixedLengthStarTreeNodeTests.java | 29 ++++---- 19 files changed, 183 insertions(+), 164 deletions(-) rename server/src/main/java/org/opensearch/index/{compositeindex/datacube/startree => codec/composite/datacube/startree/fileformats}/meta/MetricEntry.java (90%) rename server/src/main/java/org/opensearch/index/{compositeindex/datacube/startree => codec/composite/datacube/startree/fileformats}/meta/StarTreeMetadata.java (98%) rename server/src/main/java/org/opensearch/index/{compositeindex/datacube/startree => codec/composite/datacube/startree/fileformats}/meta/package-info.java (77%) create mode 100644 server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/package-info.java rename server/src/main/java/org/opensearch/index/{compositeindex/datacube/startree/utils => codec/composite/datacube/startree/fileformats/writer}/StarTreeDataWriter.java (81%) rename server/src/main/java/org/opensearch/index/{compositeindex/datacube/startree/utils => codec/composite/datacube/startree/fileformats/writer}/StarTreeMetaWriter.java (97%) create mode 100644 server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeWriter.java create mode 100644 server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/package-info.java rename server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/{utils/TreeNode.java => node/InMemoryTreeNode.java} (92%) diff --git a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java index 795af2cd379e9..795ae8066579a 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java @@ -29,14 +29,14 @@ import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.util.io.IOUtils; import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.meta.MetricEntry; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.meta.StarTreeMetadata; import org.opensearch.index.compositeindex.CompositeIndexMetadata; import org.opensearch.index.compositeindex.datacube.Dimension; import org.opensearch.index.compositeindex.datacube.Metric; import org.opensearch.index.compositeindex.datacube.ReadDimension; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; -import org.opensearch.index.compositeindex.datacube.startree.meta.MetricEntry; -import org.opensearch.index.compositeindex.datacube.startree.meta.StarTreeMetadata; import org.opensearch.index.compositeindex.datacube.startree.node.StarTree; import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode; import org.opensearch.index.compositeindex.datacube.startree.node.Tree; @@ -50,7 +50,7 @@ import java.util.Map; import java.util.Set; -import static org.opensearch.index.compositeindex.CompositeIndexConstants.MAGIC_MARKER; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; import static org.opensearch.index.compositeindex.CompositeIndexConstants.SEGMENT_DOCS_COUNT; import static org.opensearch.index.compositeindex.CompositeIndexConstants.VERSION; import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeDimensionsDocValues; @@ -129,7 +129,7 @@ public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState r break; } else if (magicMarker < 0) { throw new CorruptIndexException("Unknown token encountered: " + magicMarker, metaIn); - } else if (MAGIC_MARKER != magicMarker) { + } else if (COMPOSITE_FIELD_MARKER != magicMarker) { logger.error("Invalid composite field magic marker"); throw new IOException("Invalid composite field magic marker"); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/MetricEntry.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/meta/MetricEntry.java similarity index 90% rename from server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/MetricEntry.java rename to server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/meta/MetricEntry.java index 5d3717dc65670..ecc92f2eada83 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/MetricEntry.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/meta/MetricEntry.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.compositeindex.datacube.startree.meta; +package org.opensearch.index.codec.composite.datacube.startree.fileformats.meta; import org.opensearch.index.compositeindex.datacube.MetricStat; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetadata.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/meta/StarTreeMetadata.java similarity index 98% rename from server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetadata.java rename to server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/meta/StarTreeMetadata.java index 2019ef912ec04..4f05f5b144672 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetadata.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/meta/StarTreeMetadata.java @@ -5,7 +5,7 @@ * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ -package org.opensearch.index.compositeindex.datacube.startree.meta; +package org.opensearch.index.codec.composite.datacube.startree.fileformats.meta; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/package-info.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/meta/package-info.java similarity index 77% rename from server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/package-info.java rename to server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/meta/package-info.java index 568cacea4b59a..314dd57864f20 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/package-info.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/meta/package-info.java @@ -11,4 +11,4 @@ * * @opensearch.experimental */ -package org.opensearch.index.compositeindex.datacube.startree.meta; +package org.opensearch.index.codec.composite.datacube.startree.fileformats.meta; diff --git a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/package-info.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/package-info.java new file mode 100644 index 0000000000000..a8becb5a02dca --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/package-info.java @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * File formats for star tree + * + * @opensearch.experimental + */ +package org.opensearch.index.codec.composite.datacube.startree.fileformats; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataWriter.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeDataWriter.java similarity index 81% rename from server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataWriter.java rename to server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeDataWriter.java index a637064d8b3e1..844921a05c47e 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataWriter.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeDataWriter.java @@ -6,11 +6,12 @@ * compatible open source license. */ -package org.opensearch.index.compositeindex.datacube.startree.utils; +package org.opensearch.index.codec.composite.datacube.startree.fileformats.writer; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; import java.io.IOException; import java.util.ArrayList; @@ -19,7 +20,7 @@ import java.util.List; import java.util.Queue; -import static org.opensearch.index.compositeindex.CompositeIndexConstants.MAGIC_MARKER; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; import static org.opensearch.index.compositeindex.CompositeIndexConstants.VERSION; import static org.opensearch.index.compositeindex.datacube.startree.node.FixedLengthStarTreeNode.SERIALIZABLE_DATA_SIZE_IN_BYTES; import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; @@ -43,7 +44,7 @@ public class StarTreeDataWriter { * @return the total size in bytes of the serialized star-tree data * @throws IOException if an I/O error occurs while writing the star-tree data */ - public static long writeStarTree(IndexOutput indexOutput, TreeNode rootNode, int numNodes, String name) throws IOException { + public static long writeStarTree(IndexOutput indexOutput, InMemoryTreeNode rootNode, int numNodes, String name) throws IOException { long totalSizeInBytes = 0L; totalSizeInBytes += computeStarTreeDataHeaderByteSize(); totalSizeInBytes += (long) numNodes * SERIALIZABLE_DATA_SIZE_IN_BYTES; @@ -79,7 +80,7 @@ public static int computeStarTreeDataHeaderByteSize() { * @throws IOException if an I/O error occurs while writing the header */ private static void writeStarTreeHeader(IndexOutput output, int numNodes) throws IOException { - output.writeLong(MAGIC_MARKER); + output.writeLong(COMPOSITE_FIELD_MARKER); output.writeInt(VERSION); output.writeInt(numNodes); } @@ -91,21 +92,23 @@ private static void writeStarTreeHeader(IndexOutput output, int numNodes) throws * @param rootNode the root node of the star-tree * @throws IOException if an I/O error occurs while writing the nodes */ - private static void writeStarTreeNodes(IndexOutput output, TreeNode rootNode) throws IOException { - Queue queue = new LinkedList<>(); + private static void writeStarTreeNodes(IndexOutput output, InMemoryTreeNode rootNode) throws IOException { + Queue queue = new LinkedList<>(); queue.add(rootNode); int currentNodeId = 0; while (!queue.isEmpty()) { - TreeNode node = queue.remove(); + InMemoryTreeNode node = queue.remove(); if (node.children == null || node.children.isEmpty()) { writeStarTreeNode(output, node, ALL, ALL); } else { // Sort all children nodes based on dimension value - List sortedChildren = new ArrayList<>(node.children.values()); - sortedChildren.sort(Comparator.comparingInt(TreeNode::getNodeType).thenComparingLong(TreeNode::getDimensionValue)); + List sortedChildren = new ArrayList<>(node.children.values()); + sortedChildren.sort( + Comparator.comparingInt(InMemoryTreeNode::getNodeType).thenComparingLong(InMemoryTreeNode::getDimensionValue) + ); int firstChildId = currentNodeId + queue.size() + 1; int lastChildId = firstChildId + sortedChildren.size() - 1; @@ -127,7 +130,7 @@ private static void writeStarTreeNodes(IndexOutput output, TreeNode rootNode) th * @param lastChildId the ID of the last child node * @throws IOException if an I/O error occurs while writing the node */ - private static void writeStarTreeNode(IndexOutput output, TreeNode node, int firstChildId, int lastChildId) throws IOException { + private static void writeStarTreeNode(IndexOutput output, InMemoryTreeNode node, int firstChildId, int lastChildId) throws IOException { output.writeInt(node.dimensionId); output.writeLong(node.dimensionValue); output.writeInt(node.startDocId); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaWriter.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeMetaWriter.java similarity index 97% rename from server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaWriter.java rename to server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeMetaWriter.java index 994793914e079..c7a11b60a6a14 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaWriter.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeMetaWriter.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.compositeindex.datacube.startree.utils; +package org.opensearch.index.codec.composite.datacube.startree.fileformats.writer; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -20,7 +20,7 @@ import java.io.IOException; import java.util.List; -import static org.opensearch.index.compositeindex.CompositeIndexConstants.MAGIC_MARKER; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; import static org.opensearch.index.compositeindex.CompositeIndexConstants.VERSION; /** @@ -84,7 +84,7 @@ private static void writeMetaHeader( String starTreeFieldName ) throws IOException { // magic marker for sanity - metaOut.writeLong(MAGIC_MARKER); + metaOut.writeLong(COMPOSITE_FIELD_MARKER); // version metaOut.writeVInt(VERSION); diff --git a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeWriter.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeWriter.java new file mode 100644 index 0000000000000..12d9cde18dab3 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeWriter.java @@ -0,0 +1,74 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.codec.composite.datacube.startree.fileformats.writer; + +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; + +import java.io.IOException; +import java.util.List; + +/** + * Util class for building star tree + * + * @opensearch.experimental + */ +public class StarTreeWriter { + + private StarTreeWriter() {} + + /** + * Write star tree to index output stream + * + * @param dataOut data index output + * @param rootNode root star-tree node + * @param numNodes number of nodes in the tree + * @param name name of the star-tree field + * @return total size of the three + * @throws IOException when star-tree data serialization fails + */ + public static long writeStarTree(IndexOutput dataOut, InMemoryTreeNode rootNode, int numNodes, String name) throws IOException { + return StarTreeDataWriter.writeStarTree(dataOut, rootNode, numNodes, name); + } + + /** + * Write star tree metadata to index output stream + * + * @param metaOut meta index output + * @param starTreeField star tree field + * @param writeState segment write state + * @param metricAggregatorInfos metric aggregator infos + * @param segmentAggregatedCount segment aggregated count + * @param dataFilePointer data file pointer + * @param dataFileLength data file length + * @throws IOException when star-tree data serialization fails + */ + public static void writeStarTreeMetadata( + IndexOutput metaOut, + StarTreeField starTreeField, + SegmentWriteState writeState, + List metricAggregatorInfos, + Integer segmentAggregatedCount, + long dataFilePointer, + long dataFileLength + ) throws IOException { + StarTreeMetaWriter.writeStarTreeMetadata( + metaOut, + starTreeField, + writeState, + metricAggregatorInfos, + segmentAggregatedCount, + dataFilePointer, + dataFileLength + ); + } + +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/package-info.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/package-info.java new file mode 100644 index 0000000000000..c1635b9a3aaf1 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/package-info.java @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Writer package for star tree + * + * @opensearch.experimental + */ +package org.opensearch.index.codec.composite.datacube.startree.fileformats.writer; diff --git a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/package-info.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/package-info.java index 67808ad51289a..27793a6d5615c 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/package-info.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/package-info.java @@ -7,6 +7,6 @@ */ /** - * classes responsible for handling all star tree structures and operations as part of codec + * Classes responsible for handling all star tree structures and operations as part of codec */ package org.opensearch.index.codec.composite.datacube.startree; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java index 2c3cd06da13da..a8f2c76353966 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java @@ -16,7 +16,7 @@ public class CompositeIndexConstants { /** * The magic marker value used for sanity checks in the Composite Index implementation. */ - public static final long MAGIC_MARKER = 0xC0950513F1E1DL; // Composite Field + public static final long COMPOSITE_FIELD_MARKER = 0xC0950513F1E1DL; // Composite Field /** * The version of the Composite Index implementation. diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index 72e1152154f1d..fdacc9c5b2498 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -11,19 +11,16 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; -import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.EmptyDocValuesProducer; import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedNumericDocValuesWriterWrapper; -import org.apache.lucene.index.VectorEncoding; -import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.Counter; import org.apache.lucene.util.NumericUtils; import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.writer.StarTreeWriter; import org.opensearch.index.compositeindex.datacube.Dimension; import org.opensearch.index.compositeindex.datacube.Metric; import org.opensearch.index.compositeindex.datacube.MetricStat; @@ -32,10 +29,10 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; import org.opensearch.index.compositeindex.datacube.startree.aggregators.ValueAggregator; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; -import org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode; import org.opensearch.index.fielddata.IndexNumericFieldData; import org.opensearch.index.mapper.Mapper; import org.opensearch.index.mapper.MapperService; @@ -44,7 +41,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -82,7 +78,7 @@ public abstract class BaseStarTreeBuilder implements StarTreeBuilder { protected int numStarTreeNodes; protected final int maxLeafDocuments; - protected final TreeNode rootNode = getNewNode(); + protected final InMemoryTreeNode rootNode = getNewNode(); private final StarTreeField starTreeField; private final MapperService mapperService; @@ -179,7 +175,7 @@ public List getMetricReaders(SegmentWriteState stat FieldInfo metricFieldInfo = state.fieldInfos.fieldInfo(metric.getField()); if (metricStat != MetricStat.COUNT) { if (metricFieldInfo == null) { - metricFieldInfo = getFieldInfo(metric.getField()); + metricFieldInfo = StarTreeUtils.getFieldInfo(metric.getField(), 1); } metricReader = new SequentialDocValuesIterator( fieldProducerMap.get(metricFieldInfo.name).getSortedNumeric(metricFieldInfo) @@ -217,7 +213,7 @@ public void build( String dimension = dimensionsSplitOrder.get(i).getField(); FieldInfo dimensionFieldInfo = writeState.fieldInfos.fieldInfo(dimension); if (dimensionFieldInfo == null) { - dimensionFieldInfo = getFieldInfo(dimension); + dimensionFieldInfo = StarTreeUtils.getFieldInfo(dimension, 0); } dimensionReaders[i] = new SequentialDocValuesIterator( fieldProducerMap.get(dimensionFieldInfo.name).getSortedNumeric(dimensionFieldInfo) @@ -278,10 +274,10 @@ public void build( private void serializeStarTree(int numSegmentStarTreeDocument) throws IOException { // serialize the star tree data long dataFilePointer = dataOut.getFilePointer(); - long totalStarTreeDataLength = StarTreeUtils.writeStarTree(dataOut, rootNode, numStarTreeNodes, starTreeField.getName()); + long totalStarTreeDataLength = StarTreeWriter.writeStarTree(dataOut, rootNode, numStarTreeNodes, starTreeField.getName()); // serialize the star tree meta - StarTreeUtils.writeStarTreeMetadata( + StarTreeWriter.writeStarTreeMetadata( metaOut, starTreeField, writeState, @@ -583,8 +579,6 @@ private static long getLong(Object metric, long identityMetricValue) { try { if (metric instanceof Long) { metricValue = (long) metric; - } else if (metric != null) { - metricValue = Long.parseLong(String.valueOf(metric)); } else { logger.debug("metric value is null, returning identity metric value for the aggregator"); return identityMetricValue; @@ -636,28 +630,6 @@ public StarTreeDocument reduceStarTreeDocuments(StarTreeDocument aggregatedDocum } } - private static FieldInfo getFieldInfo(String field) { - return new FieldInfo( - field, - 1, - false, - false, - false, - IndexOptions.NONE, - DocValuesType.SORTED_NUMERIC, - -1, - Collections.emptyMap(), - 0, - 0, - 0, - 0, - VectorEncoding.FLOAT32, - VectorSimilarityFunction.EUCLIDEAN, - false, - false - ); - } - /** * Adds a document to star-tree * @@ -674,9 +646,9 @@ private void appendToStarTree(StarTreeDocument starTreeDocument) throws IOExcept * * @return return new star-tree node */ - private TreeNode getNewNode() { + private InMemoryTreeNode getNewNode() { numStarTreeNodes++; - return new TreeNode(); + return new InMemoryTreeNode(); } /** @@ -687,7 +659,7 @@ private TreeNode getNewNode() { * @param endDocId end document id * @throws IOException throws an exception if we are unable to construct the tree */ - private void constructStarTree(TreeNode node, int startDocId, int endDocId) throws IOException { + private void constructStarTree(InMemoryTreeNode node, int startDocId, int endDocId) throws IOException { int childDimensionId = node.dimensionId + 1; if (childDimensionId == numDimensions) { @@ -696,7 +668,7 @@ private void constructStarTree(TreeNode node, int startDocId, int endDocId) thro // Construct all non-star children nodes node.childDimensionId = childDimensionId; - Map children = constructNonStarNodes(startDocId, endDocId, childDimensionId); + Map children = constructNonStarNodes(startDocId, endDocId, childDimensionId); node.children = children; // Construct star-node if required @@ -705,7 +677,7 @@ private void constructStarTree(TreeNode node, int startDocId, int endDocId) thro } // Further split on child nodes if required - for (TreeNode child : children.values()) { + for (InMemoryTreeNode child : children.values()) { if (child.endDocId - child.startDocId > maxLeafDocuments) { constructStarTree(child, child.startDocId, child.endDocId); } @@ -721,14 +693,14 @@ private void constructStarTree(TreeNode node, int startDocId, int endDocId) thro * @return root node with non-star nodes constructed * @throws IOException throws an exception if we are unable to construct non-star nodes */ - private Map constructNonStarNodes(int startDocId, int endDocId, int dimensionId) throws IOException { - Map nodes = new HashMap<>(); + private Map constructNonStarNodes(int startDocId, int endDocId, int dimensionId) throws IOException { + Map nodes = new HashMap<>(); int nodeStartDocId = startDocId; Long nodeDimensionValue = getDimensionValue(startDocId, dimensionId); for (int i = startDocId + 1; i < endDocId; i++) { Long dimensionValue = getDimensionValue(i, dimensionId); if (Objects.equals(dimensionValue, nodeDimensionValue) == false) { - TreeNode child = getNewNode(); + InMemoryTreeNode child = getNewNode(); child.dimensionId = dimensionId; if (nodeDimensionValue == null) { child.dimensionValue = ALL; @@ -744,7 +716,7 @@ private Map constructNonStarNodes(int startDocId, int endDocId, nodeDimensionValue = dimensionValue; } } - TreeNode lastNode = getNewNode(); + InMemoryTreeNode lastNode = getNewNode(); lastNode.dimensionId = dimensionId; lastNode.dimensionValue = nodeDimensionValue != null ? nodeDimensionValue : ALL; lastNode.startDocId = nodeStartDocId; @@ -762,8 +734,8 @@ private Map constructNonStarNodes(int startDocId, int endDocId, * @return root node with star nodes constructed * @throws IOException throws an exception if we are unable to construct non-star nodes */ - private TreeNode constructStarNode(int startDocId, int endDocId, int dimensionId) throws IOException { - TreeNode starNode = getNewNode(); + private InMemoryTreeNode constructStarNode(int startDocId, int endDocId, int dimensionId) throws IOException { + InMemoryTreeNode starNode = getNewNode(); starNode.dimensionId = dimensionId; starNode.dimensionValue = ALL; starNode.nodeType = StarTreeNodeType.STAR.getValue(); @@ -783,7 +755,7 @@ private TreeNode constructStarNode(int startDocId, int endDocId, int dimensionId * @return aggregated star-tree documents * @throws IOException throws an exception upon failing to create new aggregated docs based on star tree */ - private StarTreeDocument createAggregatedDocs(TreeNode node) throws IOException { + private StarTreeDocument createAggregatedDocs(InMemoryTreeNode node) throws IOException { StarTreeDocument aggregatedStarTreeDocument = null; if (node.children == null) { @@ -810,7 +782,7 @@ private StarTreeDocument createAggregatedDocs(TreeNode node) throws IOException // For non-leaf node if (node.children.containsKey((long) ALL)) { // If it has star child, use the star child aggregated document directly - for (TreeNode child : node.children.values()) { + for (InMemoryTreeNode child : node.children.values()) { if (child.nodeType == StarTreeNodeType.STAR.getValue()) { aggregatedStarTreeDocument = createAggregatedDocs(child); node.aggregatedDocId = child.aggregatedDocId; @@ -821,12 +793,12 @@ private StarTreeDocument createAggregatedDocs(TreeNode node) throws IOException } else { // If no star child exists, aggregate all aggregated documents from non-star children if (node.children.values().size() == 1) { - for (TreeNode child : node.children.values()) { + for (InMemoryTreeNode child : node.children.values()) { aggregatedStarTreeDocument = reduceStarTreeDocuments(aggregatedStarTreeDocument, createAggregatedDocs(child)); node.aggregatedDocId = child.aggregatedDocId; } } else { - for (TreeNode child : node.children.values()) { + for (InMemoryTreeNode child : node.children.values()) { aggregatedStarTreeDocument = reduceStarTreeDocuments(aggregatedStarTreeDocument, createAggregatedDocs(child)); } if (null == aggregatedStarTreeDocument) { diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java index 4edb45762084d..8159d2039121c 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java @@ -47,15 +47,6 @@ public FixedLengthStarTreeNode(RandomAccessInput in, int nodeId) throws IOExcept firstChildId = getInt(FIRST_CHILD_ID_OFFSET); } - // output.writeInt(node.dimensionId); - // output.writeLong(node.dimensionValue); - // output.writeInt(node.startDocId); - // output.writeInt(node.endDocId); - // output.writeInt(node.aggregatedDocId); - // output.writeByte(node.nodeType); - // output.writeInt(firstChildId); - // output.writeInt(lastChildId); - private int getInt(int fieldOffset) throws IOException { return in.readInt(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/InMemoryTreeNode.java similarity index 92% rename from server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java rename to server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/InMemoryTreeNode.java index 42f0b921a7f07..7aa5406029ca8 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/InMemoryTreeNode.java @@ -5,7 +5,7 @@ * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ -package org.opensearch.index.compositeindex.datacube.startree.utils; +package org.opensearch.index.compositeindex.datacube.startree.node; import org.opensearch.common.annotation.ExperimentalApi; @@ -21,7 +21,7 @@ * @opensearch.experimental */ @ExperimentalApi -public class TreeNode { +public class InMemoryTreeNode { /** * The dimension id for the dimension (field) associated with this star-tree node. @@ -61,7 +61,7 @@ public class TreeNode { /** * A map containing the child nodes of this star-tree node, keyed by their dimension id. */ - public Map children; + public Map children; public long getDimensionValue() { return dimensionValue; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java index a1eae64fa5f56..f2d4ce2fef83a 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java @@ -11,12 +11,12 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.RandomAccessInput; -import org.opensearch.index.compositeindex.datacube.startree.meta.StarTreeMetadata; -import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeDataWriter; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.writer.StarTreeDataWriter; import java.io.IOException; -import static org.opensearch.index.compositeindex.CompositeIndexConstants.MAGIC_MARKER; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; import static org.opensearch.index.compositeindex.CompositeIndexConstants.VERSION; /** @@ -31,7 +31,7 @@ public class StarTree implements Tree { public StarTree(IndexInput data, StarTreeMetadata starTreeMetadata) throws IOException { long magicMarker = data.readLong(); - if (MAGIC_MARKER != magicMarker) { + if (COMPOSITE_FIELD_MARKER != magicMarker) { logger.error("Invalid magic marker"); throw new IOException("Invalid magic marker"); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java index 85b63b403fcbb..dc155df4eafca 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java @@ -10,14 +10,10 @@ import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; -import org.apache.lucene.store.IndexOutput; -import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; -import java.io.IOException; import java.util.Collections; import java.util.List; @@ -32,52 +28,6 @@ private StarTreeUtils() {} public static final int ALL = -1; - /** - * Write star tree to index output stream - * - * @param dataOut data index output - * @param rootNode root star-tree node - * @param numNodes number of nodes in the tree - * @param name name of the star-tree field - * @return total size of the three - * @throws IOException when star-tree data serialization fails - */ - public static long writeStarTree(IndexOutput dataOut, TreeNode rootNode, int numNodes, String name) throws IOException { - return StarTreeDataWriter.writeStarTree(dataOut, rootNode, numNodes, name); - } - - /** - * Write star tree metadata to index output stream - * - * @param metaOut meta index output - * @param starTreeField star tree field - * @param writeState segment write state - * @param metricAggregatorInfos metric aggregator infos - * @param segmentAggregatedCount segment aggregated count - * @param dataFilePointer data file pointer - * @param dataFileLength data file length - * @throws IOException when star-tree data serialization fails - */ - public static void writeStarTreeMetadata( - IndexOutput metaOut, - StarTreeField starTreeField, - SegmentWriteState writeState, - List metricAggregatorInfos, - Integer segmentAggregatedCount, - long dataFilePointer, - long dataFileLength - ) throws IOException { - StarTreeMetaWriter.writeStarTreeMetadata( - metaOut, - starTreeField, - writeState, - metricAggregatorInfos, - segmentAggregatedCount, - dataFilePointer, - dataFileLength - ); - } - /** * The suffix appended to dimension field names in the Star Tree index. */ diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java index b9a5a97cf504d..7cc0f578c3fab 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java @@ -41,9 +41,9 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; -import org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode; import org.opensearch.index.mapper.ContentPath; import org.opensearch.index.mapper.DocumentMapper; import org.opensearch.index.mapper.Mapper; @@ -2051,13 +2051,13 @@ private static StarTreeField getStarTreeField(int maxLeafDocs) { return sf; } - private void traverseStarTree(TreeNode root, Map> dimValueToDocIdMap, boolean traverStarNodes) { - TreeNode starTree = root; + private void traverseStarTree(InMemoryTreeNode root, Map> dimValueToDocIdMap, boolean traverStarNodes) { + InMemoryTreeNode starTree = root; // Use BFS to traverse the star tree - Queue queue = new ArrayDeque<>(); + Queue queue = new ArrayDeque<>(); queue.add(starTree); int currentDimensionId = -1; - TreeNode starTreeNode; + InMemoryTreeNode starTreeNode; List docIds = new ArrayList<>(); while ((starTreeNode = queue.poll()) != null) { int dimensionId = starTreeNode.dimensionId; @@ -2076,9 +2076,9 @@ private void traverseStarTree(TreeNode root, Map> di dimValueToDocIdMap.put(dimensionId, map); if (starTreeNode.children != null && (!traverStarNodes || (starTreeNode.nodeType == StarTreeNodeType.STAR.getValue()))) { - Iterator childrenIterator = starTreeNode.children.values().iterator(); + Iterator childrenIterator = starTreeNode.children.values().iterator(); while (childrenIterator.hasNext()) { - TreeNode childNode = childrenIterator.next(); + InMemoryTreeNode childNode = childrenIterator.next(); queue.add(childNode); } } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java index c1a97f6cb26bd..36e37e452b3f1 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java @@ -23,6 +23,9 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.Version; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.meta.MetricEntry; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.writer.StarTreeWriter; import org.opensearch.index.compositeindex.datacube.Dimension; import org.opensearch.index.compositeindex.datacube.Metric; import org.opensearch.index.compositeindex.datacube.MetricStat; @@ -30,7 +33,6 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; -import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; import org.opensearch.index.fielddata.IndexNumericFieldData; import org.opensearch.index.mapper.CompositeMappedFieldType; import org.opensearch.test.OpenSearchTestCase; @@ -45,7 +47,7 @@ import java.util.List; import java.util.UUID; -import static org.opensearch.index.compositeindex.CompositeIndexConstants.MAGIC_MARKER; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; import static org.opensearch.index.compositeindex.CompositeIndexConstants.VERSION; import static org.opensearch.index.mapper.CompositeMappedFieldType.CompositeFieldType.STAR_TREE; @@ -146,7 +148,7 @@ public void test_starTreeMetadata() throws IOException { dataFilePointer = randomNonNegativeLong(); segmentDocumentCount = randomNonNegativeInt(); metaOut = directory.createOutput("star-tree-metadata", IOContext.DEFAULT); - StarTreeUtils.writeStarTreeMetadata( + StarTreeWriter.writeStarTreeMetadata( metaOut, starTreeField, writeState, @@ -157,7 +159,7 @@ public void test_starTreeMetadata() throws IOException { ); metaOut.close(); metaIn = directory.openInput("star-tree-metadata", IOContext.READONCE); - assertEquals(MAGIC_MARKER, metaIn.readLong()); + assertEquals(COMPOSITE_FIELD_MARKER, metaIn.readLong()); assertEquals(VERSION, metaIn.readVInt()); String compositeFieldName = metaIn.readString(); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java index 13ead0efd8c51..5d5f59b62a3b7 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java @@ -12,9 +12,8 @@ import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; -import org.opensearch.index.compositeindex.datacube.startree.meta.StarTreeMetadata; -import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; -import org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.writer.StarTreeWriter; import org.opensearch.test.OpenSearchTestCase; import org.junit.Before; @@ -43,9 +42,9 @@ public void setup() throws IOException { public void test_StarTreeNode() throws IOException { dataOut = directory.createOutput("star-tree-data", IOContext.DEFAULT); - Map levelOrderStarTreeNodeMap = new LinkedHashMap<>(); - TreeNode root = generateSampleTree(levelOrderStarTreeNodeMap); - long starTreeDataLength = StarTreeUtils.writeStarTree(dataOut, root, 7, "star-tree"); + Map levelOrderStarTreeNodeMap = new LinkedHashMap<>(); + InMemoryTreeNode root = generateSampleTree(levelOrderStarTreeNodeMap); + long starTreeDataLength = StarTreeWriter.writeStarTree(dataOut, root, 7, "star-tree"); // asserting on the actual length of the star tree data file assertEquals(starTreeDataLength, 247); @@ -85,7 +84,7 @@ public void test_StarTreeNode() throws IOException { } - private void assertStarTreeNode(StarTreeNode starTreeNode, TreeNode treeNode) throws IOException { + private void assertStarTreeNode(StarTreeNode starTreeNode, InMemoryTreeNode treeNode) throws IOException { assertEquals(starTreeNode.getDimensionId(), treeNode.dimensionId); assertEquals(starTreeNode.getDimensionValue(), treeNode.dimensionValue); assertEquals(starTreeNode.getStartDocId(), treeNode.startDocId); @@ -104,9 +103,9 @@ private void assertStarTreeNode(StarTreeNode starTreeNode, TreeNode treeNode) th } - private TreeNode generateSampleTree(Map levelOrderStarTreeNode) { + private InMemoryTreeNode generateSampleTree(Map levelOrderStarTreeNode) { // Create the root node - TreeNode root = new TreeNode(); + InMemoryTreeNode root = new InMemoryTreeNode(); root.dimensionId = 0; root.startDocId = 0; root.endDocId = 100; @@ -118,7 +117,7 @@ private TreeNode generateSampleTree(Map levelOrderStarTreeNode) levelOrderStarTreeNode.put(root.dimensionValue, root); // Create child nodes for dimension 1 - TreeNode dim1Node1 = new TreeNode(); + InMemoryTreeNode dim1Node1 = new InMemoryTreeNode(); dim1Node1.dimensionId = 1; dim1Node1.dimensionValue = 1; dim1Node1.startDocId = 0; @@ -128,7 +127,7 @@ private TreeNode generateSampleTree(Map levelOrderStarTreeNode) root.nodeType = (byte) 0; dim1Node1.children = new HashMap<>(); - TreeNode dim1Node2 = new TreeNode(); + InMemoryTreeNode dim1Node2 = new InMemoryTreeNode(); dim1Node2.dimensionId = 1; dim1Node2.dimensionValue = 2; dim1Node2.startDocId = 50; @@ -145,7 +144,7 @@ private TreeNode generateSampleTree(Map levelOrderStarTreeNode) levelOrderStarTreeNode.put(dim1Node2.dimensionValue, dim1Node2); // Create child nodes for dimension 2 - TreeNode dim2Node1 = new TreeNode(); + InMemoryTreeNode dim2Node1 = new InMemoryTreeNode(); dim2Node1.dimensionId = 2; dim2Node1.dimensionValue = 3; dim2Node1.startDocId = 0; @@ -155,7 +154,7 @@ private TreeNode generateSampleTree(Map levelOrderStarTreeNode) root.nodeType = (byte) 0; dim2Node1.children = null; - TreeNode dim2Node2 = new TreeNode(); + InMemoryTreeNode dim2Node2 = new InMemoryTreeNode(); dim2Node2.dimensionId = 2; dim2Node2.dimensionValue = 4; dim2Node2.startDocId = 25; @@ -165,7 +164,7 @@ private TreeNode generateSampleTree(Map levelOrderStarTreeNode) root.nodeType = (byte) 0; dim2Node2.children = null; - TreeNode dim2Node3 = new TreeNode(); + InMemoryTreeNode dim2Node3 = new InMemoryTreeNode(); dim2Node3.dimensionId = 2; dim2Node3.dimensionValue = 5; dim2Node3.startDocId = 50; @@ -175,7 +174,7 @@ private TreeNode generateSampleTree(Map levelOrderStarTreeNode) root.nodeType = (byte) 0; dim2Node3.children = null; - TreeNode dim2Node4 = new TreeNode(); + InMemoryTreeNode dim2Node4 = new InMemoryTreeNode(); dim2Node4.dimensionId = 2; dim2Node4.dimensionValue = 6; dim2Node4.startDocId = 75;