Skip to content

Commit

Permalink
Add Star Tree unsigned-long indexing changes (#17156)
Browse files Browse the repository at this point in the history
Signed-off-by: Shailesh Singh <shaileshkumarsingh260@gmail.com>
  • Loading branch information
Shailesh-Kumar-Singh authored Jan 28, 2025
1 parent 5e12737 commit 8ec93ae
Show file tree
Hide file tree
Showing 30 changed files with 1,226 additions and 251 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.startObject()
.field("name", "keyword_dv")
.endObject()
.startObject()
.field("name", "unsignedLongDimension") // UnsignedLongDimension
.endObject()
.endArray()
.startArray("metrics")
.startObject()
Expand Down Expand Up @@ -117,6 +120,10 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.field("type", "wildcard")
.field("doc_values", false)
.endObject()
.startObject("unsignedLongDimension")
.field("type", "unsigned_long")
.field("doc_values", true)
.endObject()
.endObject()
.endObject();
} catch (IOException e) {
Expand Down Expand Up @@ -605,8 +612,11 @@ public void testValidCompositeIndex() {
for (int i = 0; i < dateDim.getSortedCalendarIntervals().size(); i++) {
assertEquals(expectedTimeUnits.get(i).shortName(), dateDim.getSortedCalendarIntervals().get(i).shortName());
}
assertEquals(4, starTreeFieldType.getDimensions().size());
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("keyword_dv", starTreeFieldType.getDimensions().get(2).getField());
assertEquals("unsignedLongDimension", starTreeFieldType.getDimensions().get(3).getField());

assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());
List<MetricStat> expectedMetrics = Arrays.asList(MetricStat.VALUE_COUNT, MetricStat.SUM, MetricStat.AVG);
assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.opensearch.index.compositeindex.CompositeIndexMetadata;
import org.opensearch.index.compositeindex.datacube.Metric;
import org.opensearch.index.compositeindex.datacube.MetricStat;
import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.DimensionConfig;
import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata;
import org.opensearch.index.compositeindex.datacube.startree.index.CompositeIndexValues;
import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues;
Expand Down Expand Up @@ -157,15 +158,15 @@ public Composite912DocValuesReader(DocValuesProducer producer, SegmentReadState
compositeIndexInputMap.put(compositeFieldName, starTreeIndexInput);
compositeIndexMetadataMap.put(compositeFieldName, starTreeMetadata);

Map<String, DocValuesType> dimensionFieldToDocValuesMap = starTreeMetadata.getDimensionFields();
Map<String, DimensionConfig> dimensionFieldToDocValuesMap = starTreeMetadata.getDimensionFields();
// generating star tree unique fields (fully qualified name for dimension and metrics)
for (Map.Entry<String, DocValuesType> dimensionEntry : dimensionFieldToDocValuesMap.entrySet()) {
for (Map.Entry<String, DimensionConfig> dimensionEntry : dimensionFieldToDocValuesMap.entrySet()) {
String dimName = fullyQualifiedFieldNameForStarTreeDimensionsDocValues(
compositeFieldName,
dimensionEntry.getKey()
);
fields.add(dimName);
dimensionFieldTypeMap.put(dimName, dimensionEntry.getValue());
dimensionFieldTypeMap.put(dimName, dimensionEntry.getValue().getDocValuesType());
}
// adding metric fields
for (Metric metric : starTreeMetadata.getMetrics()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.core.xcontent.ToXContent;

import java.util.Comparator;
import java.util.List;
import java.util.function.Consumer;

Expand All @@ -34,8 +35,8 @@ public interface Dimension extends ToXContent {
/**
* Sets the dimension values with the consumer
*
* @param value The value to be set
* @param dimSetter Consumer which sets the dimensions
* @param value The value to be set
* @param dimSetter Consumer which sets the dimensions
*/
void setDimensionValues(final Long value, final Consumer<Long> dimSetter);

Expand All @@ -45,4 +46,19 @@ public interface Dimension extends ToXContent {
List<String> getSubDimensionNames();

DocValuesType getDocValuesType();

/**
* Returns the dimensionDataType used for comparing and parsing dimension values. <br>
* This determines how numeric values are compared and parsed: <br>
* - DimensionDataType.UNSIGNED_LONG for unsigned long values <br>
* - DimensionDataType.LONG for all other numeric types (DEFAULT)
*/
default DimensionDataType getDimensionDataType() {
return DimensionDataType.LONG;
}

default Comparator<Long> comparator() {
return (a, b) -> getDimensionDataType().compare(a, b);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.compositeindex.datacube;

import org.opensearch.common.annotation.ExperimentalApi;

/**
* Represents the data type of the dimension value.
*
* @opensearch.experimental
*/
@ExperimentalApi
public enum DimensionDataType {
LONG {
@Override
int compare(Long a, Long b) {
if (a == null && b == null) {
return 0;
}
if (b == null) {
return -1;
}
if (a == null) {
return 1;
}
return Long.compare(a, b);
}
},
UNSIGNED_LONG {
@Override
int compare(Long a, Long b) {
if (a == null && b == null) {
return 0;
}
if (b == null) {
return -1;
}
if (a == null) {
return 1;
}
return Long.compareUnsigned(a, b);
}
};

abstract int compare(Long a, Long b);
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ public static Dimension parseAndCreateDimension(
return parseAndCreateDateDimension(name, dimensionMap, c);
case NumericDimension.NUMERIC:
return new NumericDimension(name);
case UnsignedLongDimension.UNSIGNED_LONG:
return new UnsignedLongDimension(name);
case ORDINAL:
return new OrdinalDimension(name);
case IP:
Expand Down Expand Up @@ -72,6 +74,8 @@ public static Dimension parseAndCreateDimension(
return parseAndCreateDateDimension(name, dimensionMap, c);
case NUMERIC:
return new NumericDimension(name);
case UNSIGNED_LONG:
return new UnsignedLongDimension(name);
case ORDINAL:
return new OrdinalDimension(name);
case IP:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ public enum DimensionType {
*/
NUMERIC,

/**
* Represents an unsigned long dimension type.
* This is used for dimensions that contain numerical values of type unsigned long.
*/
UNSIGNED_LONG,

/**
* Represents a date dimension type.
* This is used for dimensions that contain date or timestamp values.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,24 @@ public class ReadDimension implements Dimension {
public static final String READ = "read";
private final String field;
private final DocValuesType docValuesType;
private final DimensionDataType dimensionDataType;

public ReadDimension(String field) {
this.field = field;
this.docValuesType = DocValuesType.SORTED_NUMERIC;
this.dimensionDataType = DimensionDataType.LONG;
}

public ReadDimension(String field, DocValuesType docValuesType) {
this.field = field;
this.docValuesType = docValuesType;
this.dimensionDataType = DimensionDataType.LONG;
}

public ReadDimension(String field, DocValuesType docValuesType, DimensionDataType dimensionDataType) {
this.field = field;
this.docValuesType = docValuesType;
this.dimensionDataType = dimensionDataType;
}

public String getField() {
Expand Down Expand Up @@ -82,4 +91,10 @@ public boolean equals(Object o) {
public int hashCode() {
return Objects.hash(field);
}

@Override
public DimensionDataType getDimensionDataType() {
return dimensionDataType;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.compositeindex.datacube;

import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.mapper.CompositeDataCubeFieldType;

import java.io.IOException;

/**
* Unsigned Long dimension class
*
* @opensearch.experimental
*/
public class UnsignedLongDimension extends NumericDimension {

public static final String UNSIGNED_LONG = "unsigned_long";

public UnsignedLongDimension(String field) {
super(field);
}

@Override
public DimensionDataType getDimensionDataType() {
return DimensionDataType.UNSIGNED_LONG;
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(CompositeDataCubeFieldType.NAME, getField());
builder.field(CompositeDataCubeFieldType.TYPE, UNSIGNED_LONG);
builder.endObject();
return builder;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
Expand Down Expand Up @@ -112,6 +113,8 @@ public abstract class BaseStarTreeBuilder implements StarTreeBuilder {
// This should be true for merge flows
protected boolean isMerge = false;

protected final List<Comparator<Long>> dimensionComparators = new ArrayList<>();

/**
* Reads all the configuration related to dimensions and metrics, builds a star-tree based on the different construction parameters.
*
Expand All @@ -136,6 +139,9 @@ protected BaseStarTreeBuilder(
int numDims = 0;
for (Dimension dim : starTreeField.getDimensionsOrder()) {
numDims += dim.getNumSubDimensions();
for (int i = 0; i < dim.getNumSubDimensions(); i++) {
dimensionComparators.add(dim.comparator());
}
dimensionsSplitOrder.add(dim);
}
this.numDimensions = numDims;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ private Iterator<StarTreeDocument> sortAndReduceDocuments(int[] sortedDocIds, in
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
}, dimensionComparators);
} catch (UncheckedIOException ex) {
// Unwrap UncheckedIOException and throw as IOException
if (ex.getCause() != null) {
Expand Down Expand Up @@ -308,6 +308,7 @@ public List<StarTreeDocument> getStarTreeDocuments() throws IOException {
@Override
public Long getDimensionValue(int docId, int dimensionId) throws IOException {
return starTreeDocumentFileManager.getDimensionValue(docId, dimensionId);

}

/**
Expand All @@ -334,7 +335,8 @@ public Iterator<StarTreeDocument> generateStarTreeDocumentsForStarNode(int start
} catch (IOException e) {
throw new RuntimeException(e);
}
});
}, dimensionComparators);

// Create an iterator for aggregated documents
return new Iterator<StarTreeDocument>() {
boolean hasNext = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ public class OnHeapStarTreeBuilder extends BaseStarTreeBuilder {
/**
* Constructor for OnHeapStarTreeBuilder
*
* @param metaOut an index output to write star-tree metadata
* @param dataOut an index output to write star-tree data
* @param metaOut an index output to write star-tree metadata
* @param dataOut an index output to write star-tree data
* @param starTreeField star-tree field
* @param segmentWriteState segment write state
* @param mapperService helps with the numeric type of field
Expand Down Expand Up @@ -82,9 +82,8 @@ public Long getDimensionValue(int docId, int dimensionId) {
* Sorts and aggregates all the documents of the segment based on dimension and metrics configuration
*
* @param dimensionReaders List of docValues readers to read dimensions from the segment
* @param metricReaders List of docValues readers to read metrics from the segment
* @param metricReaders List of docValues readers to read metrics from the segment
* @return Iterator of star-tree documents
*
*/
@Override
public Iterator<StarTreeDocument> sortAndAggregateSegmentDocuments(
Expand Down Expand Up @@ -161,7 +160,7 @@ StarTreeDocument[] getSegmentsStarTreeDocuments(List<StarTreeValues> starTreeVal
Iterator<StarTreeDocument> sortAndAggregateStarTreeDocuments(StarTreeDocument[] starTreeDocuments, boolean isMerge) {

// sort all the documents
sortStarTreeDocumentsFromDimensionId(starTreeDocuments, 0);
sortStarTreeDocumentsFromDimensionId(starTreeDocuments, -1);

// merge the documents
return mergeStarTreeDocuments(starTreeDocuments, isMerge);
Expand Down Expand Up @@ -222,7 +221,7 @@ public Iterator<StarTreeDocument> generateStarTreeDocumentsForStarNode(int start
}

// sort star tree documents from given dimension id (as previous dimension ids have already been processed)
sortStarTreeDocumentsFromDimensionId(starTreeDocuments, dimensionId + 1);
sortStarTreeDocumentsFromDimensionId(starTreeDocuments, dimensionId);

return new Iterator<StarTreeDocument>() {
boolean hasNext = true;
Expand Down Expand Up @@ -267,22 +266,13 @@ public StarTreeDocument next() {
* Sorts the star-tree documents from the given dimension id
*
* @param starTreeDocuments star-tree documents
* @param dimensionId id of the dimension
* @param dimensionId id of the dimension
*/
private void sortStarTreeDocumentsFromDimensionId(StarTreeDocument[] starTreeDocuments, int dimensionId) {
Arrays.sort(starTreeDocuments, (o1, o2) -> {
for (int i = dimensionId; i < numDimensions; i++) {
if (!Objects.equals(o1.dimensions[i], o2.dimensions[i])) {
if (o1.dimensions[i] == null && o2.dimensions[i] == null) {
return 0;
}
if (o1.dimensions[i] == null) {
return 1;
}
if (o2.dimensions[i] == null) {
return -1;
}
return Long.compare(o1.dimensions[i], o2.dimensions[i]);
Arrays.sort(starTreeDocuments, (doc1, doc2) -> {
for (int i = dimensionId + 1; i < numDimensions; i++) {
if (!Objects.equals(doc1.dimensions[i], doc2.dimensions[i])) {
return dimensionComparators.get(i).compare(doc1.dimensions[i], doc2.dimensions[i]);
}
}
return 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,11 @@ public class StarTreeWriter {
/** Initial version for the star tree writer */
public static final int VERSION_START = 0;

/** Version for the star tree writer with updated metadata which handles unsigned long */
public static final int VERSION_DIMENSION_DATA_TYPE = 1;

/** Current version for the star tree writer */
public static final int VERSION_CURRENT = VERSION_START;
public static final int VERSION_CURRENT = VERSION_DIMENSION_DATA_TYPE;

public StarTreeWriter() {}

Expand Down
Loading

0 comments on commit 8ec93ae

Please sign in to comment.